integration testing of adapted ai workflow with fixes

This commit is contained in:
ValueOn AG 2025-12-01 19:15:50 +01:00
parent aff37fd2e2
commit b401be703f
18 changed files with 648 additions and 293 deletions

View file

@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
None, None,
description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)" description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
) )
validationMetadata: Optional[Dict[str, Any]] = Field(
None,
description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
)
registerModelLabels( registerModelLabels(

View file

@ -262,11 +262,17 @@ class AiObjects:
logger.info(f"✅ Image content part processed successfully with model: {model.name}") logger.info(f"✅ Image content part processed successfully with model: {model.name}")
# Convert to AiCallResponse format # Convert to AiCallResponse format
# Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
# Calculate processing time if not provided (fallback to 0.0)
processingTime = getattr(modelResponse, 'processingTime', None)
if processingTime is None:
processingTime = 0.0
return AiCallResponse( return AiCallResponse(
content=modelResponse.content, content=modelResponse.content,
modelName=model.name, modelName=model.name,
priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0, priceUsd=0.0, # Price will be calculated elsewhere if needed
processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0, processingTime=processingTime,
bytesSent=0, # Will be calculated elsewhere bytesSent=0, # Will be calculated elsewhere
bytesReceived=0, # Will be calculated elsewhere bytesReceived=0, # Will be calculated elsewhere
errorCount=0 errorCount=0

View file

@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
) )
try: try:
# Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
if not outputFormat:
outputFormat = "txt"
# Extraction is now separate - contentParts must be extracted before calling # Extraction is now separate - contentParts must be extracted before calling
# Require operationType to be set before calling # Require operationType to be set before calling
opType = getattr(options, "operationType", None) opType = getattr(options, "operationType", None)
if not opType: if not opType:
# If outputFormat is specified, default to DATA_GENERATE # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
if outputFormat: options.operationType = OperationTypeEnum.DATA_GENERATE
options.operationType = OperationTypeEnum.DATA_GENERATE opType = OperationTypeEnum.DATA_GENERATE
opType = OperationTypeEnum.DATA_GENERATE
else:
self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
options.operationType = analyzedOptions.operationType
# Merge other analyzed options
if hasattr(analyzedOptions, "priority"):
options.priority = analyzedOptions.priority
if hasattr(analyzedOptions, "processingMode"):
options.processingMode = analyzedOptions.processingMode
if hasattr(analyzedOptions, "compressPrompt"):
options.compressPrompt = analyzedOptions.compressPrompt
if hasattr(analyzedOptions, "compressContext"):
options.compressContext = analyzedOptions.compressContext
else:
# Default to DATA_ANALYSE if analysis fails
options.operationType = OperationTypeEnum.DATA_ANALYSE
opType = options.operationType
# Handle IMAGE_GENERATE operations # Handle IMAGE_GENERATE operations
if opType == OperationTypeEnum.IMAGE_GENERATE: if opType == OperationTypeEnum.IMAGE_GENERATE:
@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
self.services.chat.progressLogFinish(aiOperationId, False) self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg) raise ValueError(errorMsg)
# Handle document generation (outputFormat specified) # Handle document generation (outputFormat always set, defaults to "txt")
if outputFormat: # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt # outputFormat is always set now (defaults to "txt" if not specified)
options.compressPrompt = False
options.compressContext = False
# Convert contentParts to text for generation prompt (if provided) # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
if contentParts: options.compressPrompt = False
# Convert contentParts to text for generation prompt options.compressContext = False
content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
# Process contentParts for generation prompt (if provided)
# Use generic _callWithContentParts() which handles all content types (images, text, etc.)
# This automatically processes images with vision models and merges all results
if contentParts:
# Filter out binary/other parts that shouldn't be processed
processableParts = []
skippedParts = []
for p in contentParts:
if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
processableParts.append(p)
else:
skippedParts.append(p)
if skippedParts:
logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
if processableParts:
# Count images for progress update
imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
if imageCount > 0:
self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
# Build proper extraction prompt using buildExtractionPrompt
# This creates a focused extraction prompt, not the user's generation prompt
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
# Determine renderer for format-specific guidelines
renderer = None
if outputFormat:
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
renderer = generationService.getRendererForFormat(outputFormat)
except Exception as e:
logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
extractionPrompt = await buildExtractionPrompt(
outputFormat=outputFormat or "txt",
userPrompt=prompt, # User's prompt as context for what to extract
title=title or "Document",
aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
services=self.services,
renderer=renderer
)
logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
# Use DATA_EXTRACT operation type for extraction
extractionOptions = AiCallOptions(
operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
compressPrompt=options.compressPrompt,
compressContext=options.compressContext
)
extractionRequest = AiCallRequest(
prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
context="",
options=extractionOptions,
contentParts=processableParts
)
# Write debug file for extraction prompt (all parts)
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
# Call generic content parts processor - handles images, text, chunking, merging
extractionResponse = await self.aiObjects.call(extractionRequest)
# Write debug file for extraction response
if extractionResponse.content:
self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
else:
self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
# Use extracted content directly for generation prompt
if extractionResponse.errorCount == 0 and extractionResponse.content:
# The extracted content is already merged and ready to use
content_for_generation = extractionResponse.content
logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
else:
# Extraction failed - use placeholders
logger.warning(f"Content extraction failed, using placeholders")
placeholderParts = []
for p in processableParts:
placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
else: else:
content_for_generation = None content_for_generation = None
logger.debug("No processable parts found in contentParts")
else:
content_for_generation = None
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt( generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None outputFormat, prompt, title, content_for_generation, None
)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
# Extract title and filename from generated document structure
extractedTitle = title
extractedFilename = None
if isinstance(generated_data, dict) and "documents" in generated_data:
docs = generated_data["documents"]
if isinstance(docs, list) and len(docs) > 0:
firstDoc = docs[0]
if isinstance(firstDoc, dict):
if firstDoc.get("title"):
extractedTitle = firstDoc["title"]
if firstDoc.get("filename"):
extractedFilename = firstDoc["filename"]
# Ensure metadata contains the extracted title
if "metadata" not in generated_data:
generated_data["metadata"] = {}
if extractedTitle:
generated_data["metadata"]["title"] = extractedTitle
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
parentId=renderParentLogId
)
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
) )
self.services.chat.progressLogFinish(renderOperationId, True)
promptArgs = { # Determine document name
"outputFormat": outputFormat, if extractedFilename:
"userPrompt": prompt, documentName = extractedFilename
"title": title, elif extractedTitle and extractedTitle != "Generated Document":
"extracted_content": content_for_generation sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
} sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") if not sanitized.lower().endswith(f".{outputFormat}"):
# Extract user prompt from promptArgs for task completion analysis documentName = f"{sanitized}.{outputFormat}"
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
# Extract title and filename from generated document structure
extractedTitle = title
extractedFilename = None
if isinstance(generated_data, dict) and "documents" in generated_data:
docs = generated_data["documents"]
if isinstance(docs, list) and len(docs) > 0:
firstDoc = docs[0]
if isinstance(firstDoc, dict):
if firstDoc.get("title"):
extractedTitle = firstDoc["title"]
if firstDoc.get("filename"):
extractedFilename = firstDoc["filename"]
# Ensure metadata contains the extracted title
if "metadata" not in generated_data:
generated_data["metadata"] = {}
if extractedTitle:
generated_data["metadata"]["title"] = extractedTitle
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
parentId=renderParentLogId
)
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
self.services.chat.progressLogFinish(renderOperationId, True)
# Determine document name
if extractedFilename:
documentName = extractedFilename
elif extractedTitle and extractedTitle != "Generated Document":
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{outputFormat}"):
documentName = f"{sanitized}.{outputFormat}"
else:
documentName = sanitized
else: else:
documentName = f"generated.{outputFormat}" documentName = sanitized
else: else:
documentName = f"generated.{outputFormat}" documentName = f"generated.{outputFormat}"
else:
documentName = f"generated.{outputFormat}"
# Build document data # Build document data
docData = DocumentData( docData = DocumentData(
documentName=documentName, documentName=documentName,
documentData=rendered_content, documentData=rendered_content,
mimeType=mime_type, mimeType=mime_type,
sourceJson=generated_data # Preserve source JSON for structure validation sourceJson=generated_data # Preserve source JSON for structure validation
)
metadata = AiResponseMetadata(
title=extractedTitle or title or "Generated Document",
filename=extractedFilename,
operationType=opType.value if opType else None
)
self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=json.dumps(generated_data),
metadata=metadata,
documents=[docData]
)
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
if renderOperationId:
self.services.chat.progressLogFinish(renderOperationId, False)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Rendering failed: {str(e)}")
# Handle text processing (no outputFormat)
self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if contentParts:
# Process contentParts through AI
# Convert contentParts to text for prompt
contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
result_content = await self._callAiWithLooping(
fullPrompt, options, "text", None, None, aiOperationId
)
else:
# Direct text call (no documents to process)
result_content = await self._callAiWithLooping(
prompt, options, "text", None, None, aiOperationId
) )
metadata = AiResponseMetadata( metadata = AiResponseMetadata(
operationType=opType.value if opType else None title=extractedTitle or title or "Generated Document",
) filename=extractedFilename,
operationType=opType.value if opType else None
)
self.services.chat.progressLogFinish(aiOperationId, True) # Write JSON with proper formatting (not str() which can truncate)
jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse( return AiResponse(
content=result_content, content=json.dumps(generated_data),
metadata=metadata metadata=metadata,
) documents=[docData]
)
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
if renderOperationId:
self.services.chat.progressLogFinish(renderOperationId, False)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Rendering failed: {str(e)}")
except Exception as e: except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}") logger.error(f"Error in callAiContent: {str(e)}")

View file

@ -1236,8 +1236,12 @@ class JsonResponseHandler:
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows" # Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath) value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type # Count items/rows/elements based on type
if isinstance(value, list): elif isinstance(value, list):
updatedKpi["currentValue"] = len(value) updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items") logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
elif isinstance(value, (int, float)): elif isinstance(value, (int, float)):
@ -1296,8 +1300,12 @@ class JsonResponseHandler:
# Extract value using path # Extract value using path
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath) value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type # Count items/rows/elements based on type
if isinstance(value, list): elif isinstance(value, list):
updatedKpi["currentValue"] = len(value) updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items") logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
elif isinstance(value, (int, float)): elif isinstance(value, (int, float)):
@ -1321,6 +1329,7 @@ class JsonResponseHandler:
Extract value from object using dot-notation path with array indices. Extract value from object using dot-notation path with array indices.
Example: "sections[0].elements[0].items" Example: "sections[0].elements[0].items"
Returns None if path doesn't exist (for incomplete JSON handling).
""" """
parts = path.split('.') parts = path.split('.')
current = obj current = obj
@ -1332,20 +1341,30 @@ class JsonResponseHandler:
index = int(part[part.index('[') + 1:part.index(']')]) index = int(part[part.index('[') + 1:part.index(']')])
if key: if key:
current = current.get(key, []) if isinstance(current, dict):
if isinstance(current, list) and 0 <= index < len(current): current = current.get(key)
current = current[index] if current is None:
return None # Key doesn't exist
else:
return None # Can't access key on non-dict
if isinstance(current, list):
if 0 <= index < len(current):
current = current[index]
else:
# Index out of range - return None for incomplete JSON
return None
else: else:
raise KeyError(f"Invalid index {index} for {key}") # Not a list, can't index
return None
else: else:
# Handle dict access # Handle dict access
if isinstance(current, dict): if isinstance(current, dict):
current = current.get(part) current = current.get(part)
if current is None:
return None # Key doesn't exist
else: else:
raise KeyError(f"Cannot access {part} on {type(current)}") return None # Can't access key on non-dict
if current is None:
raise KeyError(f"Path {path} returned None at {part}")
return current return current

View file

@ -92,13 +92,16 @@ class ChatService:
if docRef.startswith("docItem:"): if docRef.startswith("docItem:"):
# docItem:<id>:<filename> or docItem:<id> (filename is optional) # docItem:<id>:<filename> or docItem:<id> (filename is optional)
# ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct) # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
# Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
parts = docRef.split(':') parts = docRef.split(':')
if len(parts) >= 2: if len(parts) >= 2:
docId = parts[1] # This should be the documentId (UUID) docId = parts[1] # This should be the documentId (UUID)
docFound = False docFound = False
# ALWAYS try to match by documentId first (regardless of number of parts) # ALWAYS try to match by documentId first (regardless of number of parts)
# This handles: docItem:documentId and docItem:documentId:filename # This handles both formats:
# - docItem:<documentId> (without filename - still works)
# - docItem:<documentId>:<filename> (with filename - preferred)
for message in workflow.messages: for message in workflow.messages:
# Validate message belongs to this workflow # Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None) msgWorkflowId = getattr(message, 'workflowId', None)

View file

@ -138,6 +138,36 @@ class ExtractionService:
f"extraction.process.{doc.mimeType}" f"extraction.process.{doc.mimeType}"
) )
# Write extraction results to debug file
try:
from modules.shared.debugLogger import writeDebugFile
import json
# Create summary of extraction results for debug
extractionSummary = {
"documentName": doc.fileName,
"documentMimeType": doc.mimeType,
"partsCount": len(ec.parts),
"parts": []
}
for part in ec.parts:
partSummary = {
"typeGroup": part.typeGroup,
"mimeType": part.mimeType,
"label": part.label,
"dataLength": len(part.data) if part.data else 0,
"metadata": part.metadata
}
# Include data preview for small parts (first 500 chars)
if part.data and len(part.data) <= 500:
partSummary["dataPreview"] = part.data[:500]
elif part.data:
partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
extractionSummary["parts"].append(partSummary)
writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
except Exception as e:
logger.debug(f"Failed to write extraction debug file: {str(e)}")
results.append(ec) results.append(ec)
return results return results

View file

@ -99,9 +99,16 @@ async def buildExtractionPrompt(
# Parse extraction intent if AI service is available # Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
# Build base prompt # Build base prompt with clear user prompt markers
sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
adaptive_prompt = f""" adaptive_prompt = f"""
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} {'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{sanitized_user_prompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output. You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

View file

@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else: else:
# Single table or mixed content - create main sheet # Single table or mixed content - create only main sheet
documentTitle = jsonContent.get("metadata", {}).get("title", "Document") documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(documentTitle[:31]) # Excel sheet name limit sheetNames.append(documentTitle[:31]) # Excel sheet name limit
# Add additional sheets for other content types return sheetNames
contentTypes = set()
for section in sections:
contentType = section.get("content_type", "paragraph")
contentTypes.add(contentType)
if "table" in contentTypes and len(tableSections) == 1:
sheetNames.append("Table Data")
if "list" in contentTypes:
sheetNames.append("Lists")
if "paragraph" in contentTypes or "heading" in contentTypes:
sheetNames.append("Text")
# Limit to 4 sheets maximum
return sheetNames[:4]
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names.""" """Populate Excel sheets with content from JSON based on actual sheet names."""
@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
sheetTitle = caption sheetTitle = caption
self._populateTableSheet(sheet, section, styles, sheetTitle) self._populateTableSheet(sheet, section, styles, sheetTitle)
else: else:
# Single table or mixed content - use original logic # Single table or mixed content - populate only main sheet
firstSheetName = sheetNames[0] firstSheetName = sheetNames[0]
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles) self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
# If we have multiple sheets, distribute content by type
if len(sheetNames) > 1:
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
except Exception as e: except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}") self.logger.warning(f"Could not populate Excel sheets: {str(e)}")

View file

@ -72,7 +72,13 @@ async def buildGenerationPrompt(
continuationText += "Start directly with the next element/section that should follow.\n\n" continuationText += "Start directly with the next element/section that should follow.\n\n"
# PROMPT FOR CONTINUATION # PROMPT FOR CONTINUATION
generationPrompt = f"""User request: "{userPrompt}" generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content. CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
@ -93,8 +99,57 @@ Continue generating the remaining content now.
else: else:
# PROMPT FOR FIRST CALL # PROMPT FOR FIRST CALL
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
generationPrompt = f"""User request: "{userPrompt}" if extracted_content:
# If we have extracted content, put it FIRST and make it very clear it's the source data
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
{'='*80}
CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE
{'='*80}
The content below contains the ACTUAL DATA extracted from the source documents.
You MUST use this data - DO NOT generate fake or example data.
{'='*80}
EXTRACTED CONTENT FROM DOCUMENTS:
{'='*80}
{extracted_content}
{'='*80}
END OF EXTRACTED CONTENT
{'='*80}
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
JSON structure template (structure only - use data from EXTRACTED CONTENT above):
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
Generate your complete response using the extracted content data.
"""
else:
# No extracted content - generate from scratch
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content. Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
@ -111,12 +166,5 @@ Instructions:
Generate your complete response. Generate your complete response.
""" """
# If we have extracted content, prepend it to the prompt
if extracted_content:
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generationPrompt}"""
return generationPrompt.strip() return generationPrompt.strip()

View file

@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
Attempt to repair broken JSON using multiple strategies. Attempt to repair broken JSON using multiple strategies.
Generic solution that works for any content type. Generic solution that works for any content type.
Returns the best repair attempt or None if all fail. Returns the best repair attempt or None if all fail.
IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
Only uses truncation as a last resort when structure closing fails.
""" """
if not text: if not text:
return None return None
# Strategy 1: Try to extract sections from the entire text first # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
# This preserves all data and should be tried first
closedStr = closeJsonStructures(text)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
sections = extractSectionsFromDocument(obj)
if sections:
logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
return obj
else:
# Structure closing worked but no sections found - still return it
logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
return obj
# Strategy 2: Try to extract sections from the entire text using regex
# This handles cases where the JSON structure is broken but content is intact # This handles cases where the JSON structure is broken but content is intact
# NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
extractedSections = _extractSectionsRegex(text) extractedSections = _extractSectionsRegex(text)
if extractedSections: if extractedSections:
logger.info(f"Extracted {len(extractedSections)} sections using regex") logger.info(f"Extracted {len(extractedSections)} sections using regex")
@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"documents": [{"sections": extractedSections}] "documents": [{"sections": extractedSections}]
} }
# Strategy 2: Progressive parsing - try to find longest valid prefix # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
# WARNING: This strategy truncates the input and loses data after the truncation point
# Only use as last resort when other strategies fail
logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
bestResult = None bestResult = None
bestValidLength = 0 bestValidLength = 0
@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
if err is None and isinstance(obj, dict): if err is None and isinstance(obj, dict):
bestResult = obj bestResult = obj
bestValidLength = i bestValidLength = i
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})") logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
break break
if bestResult: if bestResult:
break break
if bestResult: if bestResult:
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})") logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
# Check if we have sections in the result # Check if we have sections in the result
sections = extractSectionsFromDocument(bestResult) sections = extractSectionsFromDocument(bestResult)
@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
bestResult["documents"][0]["sections"].extend(extractedSections) bestResult["documents"][0]["sections"].extend(extractedSections)
return bestResult return bestResult
# Strategy 3: Structure closing - close incomplete structures
closedStr = closeJsonStructures(text)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing")
return obj
logger.warning("All repair strategies failed") logger.warning("All repair strategies failed")
return None return None
@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
def closeJsonStructures(text: str) -> str: def closeJsonStructures(text: str) -> str:
""" """
Close incomplete JSON structures by adding missing closing brackets. Close incomplete JSON structures by adding missing closing brackets.
Also handles unterminated strings by closing them.
""" """
if not text: if not text:
return text return text
result = text
# Handle unterminated strings: find the last unclosed string
# Look for patterns like: "value" or "value\n (unterminated)
# Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
# Try to close it by finding the last opening quote and closing it
if result.strip():
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
if quoteCount % 2 == 1:
# Find the last opening quote that's not escaped
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
# If not escaped (even number of backslashes), close the string
if escapeCount % 2 == 0:
# Find where the string should end (before next comma, bracket, or brace)
# For now, just close it at the end
result += '"'
# Count open/close brackets and braces # Count open/close brackets and braces
openBraces = text.count('{') openBraces = result.count('{')
closeBraces = text.count('}') closeBraces = result.count('}')
openBrackets = text.count('[') openBrackets = result.count('[')
closeBrackets = text.count(']') closeBrackets = result.count(']')
# Close incomplete structures # Close incomplete structures
result = text
for _ in range(openBraces - closeBraces): for _ in range(openBraces - closeBraces):
result += '}' result += '}'
for _ in range(openBrackets - closeBrackets): for _ in range(openBrackets - closeBrackets):
@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
to avoid circular dependency. Instead, it implements its own repair strategies. to avoid circular dependency. Instead, it implements its own repair strategies.
IMPORTANT: Tries to preserve data by using structure closing first before truncation.
""" """
sections = [] sections = []
# Strategy 1: Try progressive parsing to find longest valid JSON prefix # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
# Find the longest valid JSON prefix that contains sections closed_str = closeJsonStructures(text)
obj, err, _ = tryParseJson(closed_str)
if err is None and isinstance(obj, dict):
extracted_sections = extractSectionsFromDocument(obj)
if extracted_sections:
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
return extracted_sections
# Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
# WARNING: This truncates the input and loses data
# Only use if structure closing failed
logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
best_result = None best_result = None
best_valid_length = 0 best_valid_length = 0
for step_size in [1000, 500, 100, 50, 10]: for step_size in [1000, 500, 100, 50, 10]:
@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
if err is None and isinstance(obj, dict): if err is None and isinstance(obj, dict):
extracted_sections = extractSectionsFromDocument(obj) extracted_sections = extractSectionsFromDocument(obj)
if extracted_sections: if extracted_sections:
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}") logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
return extracted_sections return extracted_sections
# Store best result even if no sections found # Store best result even if no sections found
if not best_result: if not best_result:

View file

@ -1183,11 +1183,13 @@ Max length: {maxLength} characters
Based on the context, decide which documents to attach. Based on the context, decide which documents to attach.
CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
Return JSON: Return JSON:
{{ {{
"subject": "subject line", "subject": "subject line",
"body": "email body (HTML allowed)", "body": "email body (HTML allowed)",
"attachments": ["doc_ref1", "doc_ref2"] "attachments": ["docItem:<documentId>:<filename>"]
}} }}
""" """
@ -1237,6 +1239,9 @@ Return JSON:
elif isinstance(ai_attachments, list): elif isinstance(ai_attachments, list):
ai_attachments = [a for a in ai_attachments if isinstance(a, str)] ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
# Initialize normalized_ai_attachments
normalized_ai_attachments = []
if ai_attachments: if ai_attachments:
try: try:
ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
@ -1250,16 +1255,20 @@ Return JSON:
selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids] selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
if selected_docs: if selected_docs:
# Map selected ChatDocuments back to docItem references # Map selected ChatDocuments back to docItem references (with full filename)
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs] documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
# Normalize ai_attachments to full format for storage
normalized_ai_attachments = documentList.copy()
logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)") logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
else: else:
# No intersection; use all available documents # No intersection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
normalized_ai_attachments = documentList.copy()
logger.warning("AI selected attachments not found in available documents, using all documents") logger.warning("AI selected attachments not found in available documents, using all documents")
else: else:
# No AI selection; use all available documents # No AI selection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
normalized_ai_attachments = documentList.copy()
logger.warning("AI did not specify attachments, using all available documents") logger.warning("AI did not specify attachments, using all available documents")
else: else:
logger.info("No documents provided in documentList; skipping attachment processing") logger.info("No documents provided in documentList; skipping attachment processing")
@ -1363,7 +1372,7 @@ Return JSON:
"cc": cc, "cc": cc,
"bcc": bcc, "bcc": bcc,
"attachments": len(documentList), "attachments": len(documentList),
"aiSelectedAttachments": ai_attachments if ai_attachments else "all documents", "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
"aiGenerated": True, "aiGenerated": True,
"context": context, "context": context,
"emailStyle": emailStyle, "emailStyle": emailStyle,
@ -1371,12 +1380,40 @@ Return JSON:
"draftData": draft_data "draftData": draft_data
} }
# Extract attachment filenames for validation metadata
attachmentFilenames = []
attachmentReferences = []
if documentList:
try:
from modules.datamodels.datamodelDocref import DocumentReferenceList
attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
# Store normalized document references (with filenames) - use normalized_ai_attachments if available
attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
except Exception:
pass
# Create validation metadata for content validator
validationMetadata = {
"actionType": "outlook.composeAndDraftEmailWithContext",
"emailRecipients": to,
"emailCc": cc,
"emailBcc": bcc,
"emailSubject": subject,
"emailAttachments": attachmentFilenames,
"emailAttachmentReferences": attachmentReferences,
"emailAttachmentCount": len(attachmentFilenames),
"emailStyle": emailStyle,
"hasAttachments": len(attachmentFilenames) > 0
}
return ActionResult( return ActionResult(
success=True, success=True,
documents=[ActionDocument( documents=[ActionDocument(
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json", documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(draftResultData, indent=2), documentData=json.dumps(draftResultData, indent=2),
mimeType="application/json" mimeType="application/json",
validationMetadata=validationMetadata
)] )]
) )
else: else:

View file

@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
resultData = json.loads(fileData) resultData = json.loads(fileData)
foundDocuments = resultData.get("foundDocuments", []) foundDocuments = resultData.get("foundDocuments", [])
# If no foundDocuments, check if it's a listDocuments result (has listResults)
if not foundDocuments and "listResults" in resultData:
logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
listResults = resultData.get("listResults", [])
foundDocuments = []
siteIdFromList = None
siteNameFromList = None
for listResult in listResults:
siteResults = listResult.get("siteResults", [])
for siteResult in siteResults:
items = siteResult.get("items", [])
# Extract site info from first item if available
if items and not siteIdFromList:
# Try to get site info from the siteResult structure
# We need to discover sites to get the siteId
siteNameFromList = items[0].get("siteName")
for item in items:
# Convert listDocuments item format to foundDocuments format
if item.get("type") == "file":
foundDoc = {
"id": item.get("id"),
"name": item.get("name"),
"type": "file",
"siteName": item.get("siteName"),
"siteId": None, # Will be determined from site discovery
"webUrl": item.get("webUrl"),
"fullPath": item.get("webUrl", ""),
"parentPath": item.get("parentPath", "")
}
foundDocuments.append(foundDoc)
# Discover sites to get siteId if we have siteName
if foundDocuments and siteNameFromList and not siteIdFromList:
logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
if matchingSites:
siteIdFromList = matchingSites[0].get("id")
# Update all foundDocuments with siteId
for doc in foundDocuments:
doc["siteId"] = siteIdFromList
logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
logger.info(f"Converted {len(foundDocuments)} files from listResults format")
if foundDocuments: if foundDocuments:
# Extract SharePoint file IDs from foundDocuments # Extract SharePoint file IDs from foundDocuments
sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"] sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
siteName = firstDoc.get("siteName") siteName = firstDoc.get("siteName")
siteId = firstDoc.get("siteId") siteId = firstDoc.get("siteId")
# If siteId is missing (from listDocuments conversion), discover sites to find it
if siteName and not siteId:
logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteName)
if matchingSites:
siteId = matchingSites[0].get("id")
logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
if siteName and siteId: if siteName and siteId:
sites = [{ sites = [{
"id": siteId, "id": siteId,
@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
"webUrl": firstDoc.get("webUrl", "") "webUrl": firstDoc.get("webUrl", "")
}] }]
logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})") logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
elif siteName:
# Try to get site by name
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteName)
if matchingSites:
sites = [{
"id": matchingSites[0].get("id"),
"displayName": siteName,
"webUrl": matchingSites[0].get("webUrl", "")
}]
logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
else:
return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
else: else:
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.") return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
else: else:

View file

@ -421,14 +421,6 @@ class ContentValidator:
if actionName: if actionName:
# Convert action name to human-readable format # Convert action name to human-readable format
actionDescription = actionName.replace("ai.", "").replace(".", " ").title() actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
if "convert" in actionName.lower():
actionDescription = "Document format conversion"
elif "generate" in actionName.lower() or "create" in actionName.lower():
actionDescription = "Document generation"
elif "extract" in actionName.lower():
actionDescription = "Content extraction"
elif "process" in actionName.lower():
actionDescription = "Content processing"
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})" actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
# Build action parameters context # Build action parameters context
@ -441,6 +433,25 @@ class ContentValidator:
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2) paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}" actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
# Extract validation metadata from documents (action-specific context)
validationMetadataContext = ""
if documents:
metadataList = []
for doc in documents:
metadata = getattr(doc, 'validationMetadata', None)
if metadata and isinstance(metadata, dict):
metadataList.append(metadata)
if metadataList:
# Combine all metadata (usually just one document)
combinedMetadata = {}
for meta in metadataList:
combinedMetadata.update(meta)
if combinedMetadata:
metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
# Format success criteria for display with index numbers # Format success criteria for display with index numbers
if successCriteria: if successCriteria:
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)]) criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
@ -452,7 +463,7 @@ class ContentValidator:
=== TASK INFORMATION === === TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}' {objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType} EXPECTED DATA TYPE: {dataType}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext} EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
=== VALIDATION INSTRUCTIONS === === VALIDATION INSTRUCTIONS ===
@ -466,6 +477,7 @@ VALIDATION RULES:
5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. 5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
VALIDATION STEPS: VALIDATION STEPS:
- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
- Check structure summary for quantities, counts, statistics - Check structure summary for quantities, counts, statistics
- Compare found values with required values from criteria - Compare found values with required values from criteria
- If structure unavailable, use metadata only (format, filename, size) - If structure unavailable, use metadata only (format, filename, size)

View file

@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
Execute task using Automation mode - executes predefined actions directly. Execute task using Automation mode - executes predefined actions directly.
No AI planning or review phases - actions are executed sequentially as defined. No AI planning or review phases - actions are executed sequentially as defined.
""" """
# Get task index from workflow state for consistency
if taskIndex is None:
taskIndex = workflow.getTaskIndex()
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===") logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
try: try:
@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
# Update workflow before executing task # Update workflow before executing task
if taskIndex is not None: if taskIndex is not None:
self._updateWorkflowBeforeExecutingTask(taskIndex) self._updateWorkflowBeforeExecutingTask(taskIndex)
self.services.chat.setWorkflowContext(taskNumber=taskIndex)
# Create task start message # Create task start message
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks) await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
# Execute action # Execute action
result = await self.actionExecutor.executeSingleAction( result = await self.actionExecutor.executeSingleAction(
action, workflow, taskStep, taskIndex, actionNumber, totalActions action, workflow, taskStep
) )
actionResults.append(result) actionResults.append(result)

View file

@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
# Use connectionReference from selection (required) # Use connectionReference from selection (required)
connectionRef = selection.get('connectionReference') connectionRef = selection.get('connectionReference')
# If not found at top level, check in selection['parameters'] (guided action case)
if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
connectionRef = selection['parameters'].get('connectionReference')
if connectionRef: if connectionRef:
# Check if action actually has connectionReference parameter # Check if action actually has connectionReference parameter
methodName, actionName = compoundActionName.split('.', 1) methodName, actionName = compoundActionName.split('.', 1)

View file

@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
OBJECTIVE: {{KEY:TASK_OBJECTIVE}} OBJECTIVE: {{KEY:TASK_OBJECTIVE}}
=== AVAILABLE RESOURCES === === AVAILABLE RESOURCES ===
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
{{KEY:AVAILABLE_DOCUMENTS_INDEX}} {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}} AVAILABLE_CONNECTIONS_INDEX:
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
=== AVAILABLE ACTIONS === === AVAILABLE ACTIONS ===
{{KEY:AVAILABLE_METHODS}} {{KEY:AVAILABLE_METHODS}}
@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
- Be the next logical incremental step (not complete entire objective in one step) - Be the next logical incremental step (not complete entire objective in one step)
- Target exactly one output format if producing files - Target exactly one output format if producing files
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...) - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
- Learn from previous validation feedback and avoid repeated mistakes - Learn from previous validation feedback and avoid repeated mistakes
- Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria) - Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)
@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
"successCriteria": ["specific criterion 1", "specific criterion 2"], "successCriteria": ["specific criterion 1", "specific criterion 2"],
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)", "userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
"learnings": ["..."], "learnings": ["..."],
"requiredInputDocuments": ["docList:..."], "requiredInputDocuments": ["docItem:<documentId>:<filename>", "docList:<label>"],
"requiredConnection": "connection:..." | null, "requiredConnection": "connection:..." | null,
"parametersContext": "concise text that Stage 2 will use to set business parameters" "parametersContext": "concise text that Stage 2 will use to set business parameters"
}} }}
@ -115,6 +117,9 @@ Analyze actionObjective to determine:
3. parametersContext: short, sufficient for Stage 2 3. parametersContext: short, sufficient for Stage 2
4. Return ONLY JSON - no markdown, no explanations 4. Return ONLY JSON - no markdown, no explanations
5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify) 5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
- For document lists: use docList:<label> format
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX 6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX
7. Plan incrementally: one output format per step 7. Plan incrementally: one output format per step
8. Learn from validation feedback - avoid repeating mistakes 8. Learn from validation feedback - avoid repeating mistakes
@ -307,6 +312,7 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True), PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False), PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True), PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
] ]
template = """TASK DECISION template = """TASK DECISION
@ -321,7 +327,9 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
=== AVAILABLE RESOURCES === === AVAILABLE RESOURCES ===
ACTIONS: {{KEY:AVAILABLE_METHODS}} ACTIONS: {{KEY:AVAILABLE_METHODS}}
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}} AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
AVAILABLE_CONNECTIONS_INDEX:
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
{{KEY:REVIEW_CONTENT}} {{KEY:REVIEW_CONTENT}}
@ -334,12 +342,20 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
- Next action should ONLY generate the MISSING part, NOT repeat what's already delivered - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
=== OUTPUT FORMAT === === OUTPUT FORMAT ===
Return ONLY JSON (no markdown, no explanations). The decision MUST:
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
- Use ONLY exact labels from AVAILABLE_CONNECTIONS_INDEX (connection:...)
- Provide concrete parameter values in nextActionParameters (not placeholders)
- Match parameter names exactly as defined in AVAILABLE_METHODS
{{ {{
"status": "continue", "status": "continue",
"reason": "Brief reason explaining why continuing", "reason": "Brief reason explaining why continuing",
"nextAction": "Selected_action_from_ACTIONS", "nextAction": "Selected_action_from_ACTIONS",
"nextActionParameters": {{ "nextActionParameters": {{
"documentList": ["docItem:reference_from_DOCUMENTS"], "documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
"connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
"parameter1": "value1", "parameter1": "value1",
"parameter2": "value2" "parameter2": "value2"
}}, }},
@ -347,16 +363,21 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
}} }}
=== RULES === === RULES ===
- If "continue": MUST provide nextAction and nextActionParameters 1. Return ONLY JSON - no markdown, no explanations
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent) 2. If "continue": MUST provide nextAction and nextActionParameters
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names) 3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent) 4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION 5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
- CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters - For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
- Do NOT repeat failed actions - suggest DIFFERENT approach - For document lists: use docList:<label> format
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION 6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
- If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart 7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
9. Do NOT repeat failed actions - suggest DIFFERENT approach
10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
""" """

View file

@ -428,7 +428,7 @@ class WorkflowProcessor:
) )
# Prepare AI call options for fast path (balanced, fast processing) # Prepare AI call options for fast path (balanced, fast processing)
from modules.datamodels.datamodelAi import AiCallOptions from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest
options = AiCallOptions( options = AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE, operationType=OperationTypeEnum.DATA_ANALYSE,
@ -438,16 +438,19 @@ class WorkflowProcessor:
maxProcessingTime=15 # Fast path should complete in 15s maxProcessingTime=15 # Fast path should complete in 15s
) )
# Call AI (content call - no documents needed for fast path) # Call AI directly (no document generation - just plain text response)
aiResponse = await self.services.ai.callAiContent( # Use aiObjects.call() instead of callAiContent() to avoid document generation path
aiRequest = AiCallRequest(
prompt=fastPathPrompt, prompt=fastPathPrompt,
contentParts=None, # Fast path doesn't process documents context="",
options=options, options=options,
outputFormat=None # Text response, not document generation contentParts=None # Fast path doesn't process documents
) )
# Extract response content (AiResponse.content is a string) aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
responseText = aiResponse.content if isinstance(aiResponse, str) else (aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse))
# Extract response content (AiCallResponse.content is a string)
responseText = aiCallResponse.content if aiCallResponse.content else ""
# Create ActionResult with response # Create ActionResult with response
# For fast path, we create a simple text document with the response # For fast path, we create a simple text document with the response

View file

@ -162,30 +162,38 @@ class WorkflowManager:
self.workflowProcessor = WorkflowProcessor(self.services) self.workflowProcessor = WorkflowProcessor(self.services)
# Process user-uploaded documents from userInput for complexity detection # Get workflow mode to determine if complexity detection is needed
# This is the correct way: use the input data directly, not workflow state workflowMode = getattr(self.services.workflow, 'workflowMode', None)
documents = [] skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
if userInput.listFileId:
try:
documents = await self._processFileIds(userInput.listFileId, None)
except Exception as e:
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
# Detect complexity (AI-based semantic understanding) using user input documents if skipComplexityDetection:
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents) logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
logger.info(f"Request complexity detected: {complexity}") complexity = "moderate" # Default for automation workflows
else:
# Process user-uploaded documents from userInput for complexity detection
# This is the correct way: use the input data directly, not workflow state
documents = []
if userInput.listFileId:
try:
documents = await self._processFileIds(userInput.listFileId, None)
except Exception as e:
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
# Detect complexity (AI-based semantic understanding) using user input documents
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
logger.info(f"Request complexity detected: {complexity}")
# Now send the first message (which will also process the documents again, but that's fine) # Now send the first message (which will also process the documents again, but that's fine)
await self._sendFirstMessage(userInput) await self._sendFirstMessage(userInput)
# Route to fast path for simple requests # Route to fast path for simple requests (skip for automation mode)
if complexity == "simple": if not skipComplexityDetection and complexity == "simple":
logger.info("Routing to fast path for simple request") logger.info("Routing to fast path for simple request")
await self._executeFastPath(userInput, documents) await self._executeFastPath(userInput, documents)
return # Fast path completes the workflow return # Fast path completes the workflow
# Route to full workflow for moderate/complex requests # Route to full workflow for moderate/complex requests or automation mode
logger.info(f"Routing to full workflow for {complexity} request") logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
taskPlan = await self._planTasks(userInput) taskPlan = await self._planTasks(userInput)
await self._executeTasks(taskPlan) await self._executeTasks(taskPlan)
await self._processWorkflowResults() await self._processWorkflowResults()