integration testing of adapted ai workflow with fixes

This commit is contained in:
ValueOn AG 2025-12-01 19:15:50 +01:00
parent aff37fd2e2
commit b401be703f
18 changed files with 648 additions and 293 deletions

View file

@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
None,
description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
)
validationMetadata: Optional[Dict[str, Any]] = Field(
None,
description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
)
registerModelLabels(

View file

@ -262,11 +262,17 @@ class AiObjects:
logger.info(f"✅ Image content part processed successfully with model: {model.name}")
# Convert to AiCallResponse format
# Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
# Calculate processing time if not provided (fallback to 0.0)
processingTime = getattr(modelResponse, 'processingTime', None)
if processingTime is None:
processingTime = 0.0
return AiCallResponse(
content=modelResponse.content,
modelName=model.name,
priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
priceUsd=0.0, # Price will be calculated elsewhere if needed
processingTime=processingTime,
bytesSent=0, # Will be calculated elsewhere
bytesReceived=0, # Will be calculated elsewhere
errorCount=0

View file

@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
)
try:
# Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
if not outputFormat:
outputFormat = "txt"
# Extraction is now separate - contentParts must be extracted before calling
# Require operationType to be set before calling
opType = getattr(options, "operationType", None)
if not opType:
# If outputFormat is specified, default to DATA_GENERATE
if outputFormat:
options.operationType = OperationTypeEnum.DATA_GENERATE
opType = OperationTypeEnum.DATA_GENERATE
else:
self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
options.operationType = analyzedOptions.operationType
# Merge other analyzed options
if hasattr(analyzedOptions, "priority"):
options.priority = analyzedOptions.priority
if hasattr(analyzedOptions, "processingMode"):
options.processingMode = analyzedOptions.processingMode
if hasattr(analyzedOptions, "compressPrompt"):
options.compressPrompt = analyzedOptions.compressPrompt
if hasattr(analyzedOptions, "compressContext"):
options.compressContext = analyzedOptions.compressContext
else:
# Default to DATA_ANALYSE if analysis fails
options.operationType = OperationTypeEnum.DATA_ANALYSE
opType = options.operationType
# outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
options.operationType = OperationTypeEnum.DATA_GENERATE
opType = OperationTypeEnum.DATA_GENERATE
# Handle IMAGE_GENERATE operations
if opType == OperationTypeEnum.IMAGE_GENERATE:
@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
# Handle document generation (outputFormat specified)
if outputFormat:
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
options.compressPrompt = False
options.compressContext = False
# Handle document generation (outputFormat always set, defaults to "txt")
# Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
# outputFormat is always set now (defaults to "txt" if not specified)
# CRITICAL: For document generation with JSON templates, NEVER compress the prompt
options.compressPrompt = False
options.compressContext = False
# Process contentParts for generation prompt (if provided)
# Use generic _callWithContentParts() which handles all content types (images, text, etc.)
# This automatically processes images with vision models and merges all results
if contentParts:
# Filter out binary/other parts that shouldn't be processed
processableParts = []
skippedParts = []
for p in contentParts:
if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
processableParts.append(p)
else:
skippedParts.append(p)
# Convert contentParts to text for generation prompt (if provided)
if contentParts:
# Convert contentParts to text for generation prompt
content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
if skippedParts:
logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
if processableParts:
# Count images for progress update
imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
if imageCount > 0:
self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
# Build proper extraction prompt using buildExtractionPrompt
# This creates a focused extraction prompt, not the user's generation prompt
from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
# Determine renderer for format-specific guidelines
renderer = None
if outputFormat:
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
renderer = generationService.getRendererForFormat(outputFormat)
except Exception as e:
logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
extractionPrompt = await buildExtractionPrompt(
outputFormat=outputFormat or "txt",
userPrompt=prompt, # User's prompt as context for what to extract
title=title or "Document",
aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
services=self.services,
renderer=renderer
)
logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
# Use DATA_EXTRACT operation type for extraction
extractionOptions = AiCallOptions(
operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
compressPrompt=options.compressPrompt,
compressContext=options.compressContext
)
extractionRequest = AiCallRequest(
prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
context="",
options=extractionOptions,
contentParts=processableParts
)
# Write debug file for extraction prompt (all parts)
self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
# Call generic content parts processor - handles images, text, chunking, merging
extractionResponse = await self.aiObjects.call(extractionRequest)
# Write debug file for extraction response
if extractionResponse.content:
self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
else:
self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
# Use extracted content directly for generation prompt
if extractionResponse.errorCount == 0 and extractionResponse.content:
# The extracted content is already merged and ready to use
content_for_generation = extractionResponse.content
logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
else:
# Extraction failed - use placeholders
logger.warning(f"Content extraction failed, using placeholders")
placeholderParts = []
for p in processableParts:
placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
else:
content_for_generation = None
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None
logger.debug("No processable parts found in contentParts")
else:
content_for_generation = None
self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
generation_prompt = await buildGenerationPrompt(
outputFormat, prompt, title, content_for_generation, None
)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
# Extract title and filename from generated document structure
extractedTitle = title
extractedFilename = None
if isinstance(generated_data, dict) and "documents" in generated_data:
docs = generated_data["documents"]
if isinstance(docs, list) and len(docs) > 0:
firstDoc = docs[0]
if isinstance(firstDoc, dict):
if firstDoc.get("title"):
extractedTitle = firstDoc["title"]
if firstDoc.get("filename"):
extractedFilename = firstDoc["filename"]
# Ensure metadata contains the extracted title
if "metadata" not in generated_data:
generated_data["metadata"] = {}
if extractedTitle:
generated_data["metadata"]["title"] = extractedTitle
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
parentId=renderParentLogId
)
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
self.services.chat.progressLogFinish(renderOperationId, True)
promptArgs = {
"outputFormat": outputFormat,
"userPrompt": prompt,
"title": title,
"extracted_content": content_for_generation
}
self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
# Extract user prompt from promptArgs for task completion analysis
userPrompt = None
if promptArgs:
userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
generated_json = await self._callAiWithLooping(
generation_prompt,
options,
"document_generation",
buildGenerationPrompt,
promptArgs,
aiOperationId,
userPrompt=userPrompt
)
self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
try:
extracted_json = self.services.utils.jsonExtractString(generated_json)
generated_data = json.loads(extracted_json)
except json.JSONDecodeError as e:
logger.error(f"Failed to parse generated JSON: {str(e)}")
self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Generated content is not valid JSON: {str(e)}")
# Extract title and filename from generated document structure
extractedTitle = title
extractedFilename = None
if isinstance(generated_data, dict) and "documents" in generated_data:
docs = generated_data["documents"]
if isinstance(docs, list) and len(docs) > 0:
firstDoc = docs[0]
if isinstance(firstDoc, dict):
if firstDoc.get("title"):
extractedTitle = firstDoc["title"]
if firstDoc.get("filename"):
extractedFilename = firstDoc["filename"]
# Ensure metadata contains the extracted title
if "metadata" not in generated_data:
generated_data["metadata"] = {}
if extractedTitle:
generated_data["metadata"]["title"] = extractedTitle
# Create separate operation for content rendering
renderOperationId = f"{aiOperationId}_render"
renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
self.services.chat.progressLogStart(
renderOperationId,
"Content Rendering",
"Rendering",
f"Format: {outputFormat}",
parentId=renderParentLogId
)
try:
from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
generationService = GenerationService(self.services)
self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
rendered_content, mime_type = await generationService.renderReport(
generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
self.services.chat.progressLogFinish(renderOperationId, True)
# Determine document name
if extractedFilename:
documentName = extractedFilename
elif extractedTitle and extractedTitle != "Generated Document":
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{outputFormat}"):
documentName = f"{sanitized}.{outputFormat}"
else:
documentName = sanitized
# Determine document name
if extractedFilename:
documentName = extractedFilename
elif extractedTitle and extractedTitle != "Generated Document":
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if sanitized:
if not sanitized.lower().endswith(f".{outputFormat}"):
documentName = f"{sanitized}.{outputFormat}"
else:
documentName = f"generated.{outputFormat}"
documentName = sanitized
else:
documentName = f"generated.{outputFormat}"
# Build document data
docData = DocumentData(
documentName=documentName,
documentData=rendered_content,
mimeType=mime_type,
sourceJson=generated_data # Preserve source JSON for structure validation
)
metadata = AiResponseMetadata(
title=extractedTitle or title or "Generated Document",
filename=extractedFilename,
operationType=opType.value if opType else None
)
self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=json.dumps(generated_data),
metadata=metadata,
documents=[docData]
)
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
if renderOperationId:
self.services.chat.progressLogFinish(renderOperationId, False)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Rendering failed: {str(e)}")
# Handle text processing (no outputFormat)
self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
if contentParts:
# Process contentParts through AI
# Convert contentParts to text for prompt
contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
result_content = await self._callAiWithLooping(
fullPrompt, options, "text", None, None, aiOperationId
else:
documentName = f"generated.{outputFormat}"
# Build document data
docData = DocumentData(
documentName=documentName,
documentData=rendered_content,
mimeType=mime_type,
sourceJson=generated_data # Preserve source JSON for structure validation
)
else:
# Direct text call (no documents to process)
result_content = await self._callAiWithLooping(
prompt, options, "text", None, None, aiOperationId
metadata = AiResponseMetadata(
title=extractedTitle or title or "Generated Document",
filename=extractedFilename,
operationType=opType.value if opType else None
)
metadata = AiResponseMetadata(
operationType=opType.value if opType else None
)
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=result_content,
metadata=metadata
)
# Write JSON with proper formatting (not str() which can truncate)
jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
self.services.chat.progressLogFinish(aiOperationId, True)
return AiResponse(
content=json.dumps(generated_data),
metadata=metadata,
documents=[docData]
)
except Exception as e:
logger.error(f"Error rendering document: {str(e)}")
if renderOperationId:
self.services.chat.progressLogFinish(renderOperationId, False)
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(f"Rendering failed: {str(e)}")
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")

View file

@ -1236,8 +1236,12 @@ class JsonResponseHandler:
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
if isinstance(value, list):
elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
elif isinstance(value, (int, float)):
@ -1296,8 +1300,12 @@ class JsonResponseHandler:
# Extract value using path
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
# Handle None (path doesn't exist - incomplete JSON)
if value is None:
updatedKpi["currentValue"] = kpi.get("currentValue", 0)
logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
if isinstance(value, list):
elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
elif isinstance(value, (int, float)):
@ -1321,6 +1329,7 @@ class JsonResponseHandler:
Extract value from object using dot-notation path with array indices.
Example: "sections[0].elements[0].items"
Returns None if path doesn't exist (for incomplete JSON handling).
"""
parts = path.split('.')
current = obj
@ -1332,20 +1341,30 @@ class JsonResponseHandler:
index = int(part[part.index('[') + 1:part.index(']')])
if key:
current = current.get(key, [])
if isinstance(current, list) and 0 <= index < len(current):
current = current[index]
if isinstance(current, dict):
current = current.get(key)
if current is None:
return None # Key doesn't exist
else:
return None # Can't access key on non-dict
if isinstance(current, list):
if 0 <= index < len(current):
current = current[index]
else:
# Index out of range - return None for incomplete JSON
return None
else:
raise KeyError(f"Invalid index {index} for {key}")
# Not a list, can't index
return None
else:
# Handle dict access
if isinstance(current, dict):
current = current.get(part)
if current is None:
return None # Key doesn't exist
else:
raise KeyError(f"Cannot access {part} on {type(current)}")
if current is None:
raise KeyError(f"Path {path} returned None at {part}")
return None # Can't access key on non-dict
return current

View file

@ -92,13 +92,16 @@ class ChatService:
if docRef.startswith("docItem:"):
# docItem:<id>:<filename> or docItem:<id> (filename is optional)
# ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
# Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
parts = docRef.split(':')
if len(parts) >= 2:
docId = parts[1] # This should be the documentId (UUID)
docFound = False
# ALWAYS try to match by documentId first (regardless of number of parts)
# This handles: docItem:documentId and docItem:documentId:filename
# This handles both formats:
# - docItem:<documentId> (without filename - still works)
# - docItem:<documentId>:<filename> (with filename - preferred)
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)

View file

@ -138,6 +138,36 @@ class ExtractionService:
f"extraction.process.{doc.mimeType}"
)
# Write extraction results to debug file
try:
from modules.shared.debugLogger import writeDebugFile
import json
# Create summary of extraction results for debug
extractionSummary = {
"documentName": doc.fileName,
"documentMimeType": doc.mimeType,
"partsCount": len(ec.parts),
"parts": []
}
for part in ec.parts:
partSummary = {
"typeGroup": part.typeGroup,
"mimeType": part.mimeType,
"label": part.label,
"dataLength": len(part.data) if part.data else 0,
"metadata": part.metadata
}
# Include data preview for small parts (first 500 chars)
if part.data and len(part.data) <= 500:
partSummary["dataPreview"] = part.data[:500]
elif part.data:
partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
extractionSummary["parts"].append(partSummary)
writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
except Exception as e:
logger.debug(f"Failed to write extraction debug file: {str(e)}")
results.append(ec)
return results

View file

@ -99,9 +99,16 @@ async def buildExtractionPrompt(
# Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
# Build base prompt
# Build base prompt with clear user prompt markers
sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
adaptive_prompt = f"""
{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{sanitized_user_prompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

View file

@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else:
# Single table or mixed content - create main sheet
# Single table or mixed content - create only main sheet
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
# Add additional sheets for other content types
contentTypes = set()
for section in sections:
contentType = section.get("content_type", "paragraph")
contentTypes.add(contentType)
if "table" in contentTypes and len(tableSections) == 1:
sheetNames.append("Table Data")
if "list" in contentTypes:
sheetNames.append("Lists")
if "paragraph" in contentTypes or "heading" in contentTypes:
sheetNames.append("Text")
# Limit to 4 sheets maximum
return sheetNames[:4]
return sheetNames
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
sheetTitle = caption
self._populateTableSheet(sheet, section, styles, sheetTitle)
else:
# Single table or mixed content - use original logic
# Single table or mixed content - populate only main sheet
firstSheetName = sheetNames[0]
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
# If we have multiple sheets, distribute content by type
if len(sheetNames) > 1:
self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")

View file

@ -72,7 +72,13 @@ async def buildGenerationPrompt(
continuationText += "Start directly with the next element/section that should follow.\n\n"
# PROMPT FOR CONTINUATION
generationPrompt = f"""User request: "{userPrompt}"
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
@ -93,8 +99,57 @@ Continue generating the remaining content now.
else:
# PROMPT FOR FIRST CALL
# Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
if extracted_content:
# If we have extracted content, put it FIRST and make it very clear it's the source data
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
generationPrompt = f"""User request: "{userPrompt}"
{'='*80}
CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE
{'='*80}
The content below contains the ACTUAL DATA extracted from the source documents.
You MUST use this data - DO NOT generate fake or example data.
{'='*80}
EXTRACTED CONTENT FROM DOCUMENTS:
{'='*80}
{extracted_content}
{'='*80}
END OF EXTRACTED CONTENT
{'='*80}
Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
JSON structure template (structure only - use data from EXTRACTED CONTENT above):
{jsonTemplate}
Instructions:
- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
- Do NOT reuse example section IDs; create your own.
- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
- Output JSON only; no markdown fences or extra text.
Generate your complete response using the extracted content data.
"""
else:
# No extracted content - generate from scratch
generationPrompt = f"""{'='*80}
USER REQUEST / USER PROMPT:
{'='*80}
{userPrompt}
{'='*80}
END OF USER REQUEST / USER PROMPT
{'='*80}
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
@ -111,12 +166,5 @@ Instructions:
Generate your complete response.
"""
# If we have extracted content, prepend it to the prompt
if extracted_content:
generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
{extracted_content}
{generationPrompt}"""
return generationPrompt.strip()

View file

@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
Attempt to repair broken JSON using multiple strategies.
Generic solution that works for any content type.
Returns the best repair attempt or None if all fail.
IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
Only uses truncation as a last resort when structure closing fails.
"""
if not text:
return None
# Strategy 1: Try to extract sections from the entire text first
# Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
# This preserves all data and should be tried first
closedStr = closeJsonStructures(text)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
sections = extractSectionsFromDocument(obj)
if sections:
logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
return obj
else:
# Structure closing worked but no sections found - still return it
logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
return obj
# Strategy 2: Try to extract sections from the entire text using regex
# This handles cases where the JSON structure is broken but content is intact
# NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
extractedSections = _extractSectionsRegex(text)
if extractedSections:
logger.info(f"Extracted {len(extractedSections)} sections using regex")
@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"documents": [{"sections": extractedSections}]
}
# Strategy 2: Progressive parsing - try to find longest valid prefix
# Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
# WARNING: This strategy truncates the input and loses data after the truncation point
# Only use as last resort when other strategies fail
logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
bestResult = None
bestValidLength = 0
@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
if err is None and isinstance(obj, dict):
bestResult = obj
bestValidLength = i
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
break
if bestResult:
break
if bestResult:
logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
# Check if we have sections in the result
sections = extractSectionsFromDocument(bestResult)
@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
bestResult["documents"][0]["sections"].extend(extractedSections)
return bestResult
# Strategy 3: Structure closing - close incomplete structures
closedStr = closeJsonStructures(text)
obj, err, _ = tryParseJson(closedStr)
if err is None and isinstance(obj, dict):
logger.info("Repaired JSON using structure closing")
return obj
logger.warning("All repair strategies failed")
return None
@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
def closeJsonStructures(text: str) -> str:
"""
Close incomplete JSON structures by adding missing closing brackets.
Also handles unterminated strings by closing them.
"""
if not text:
return text
result = text
# Handle unterminated strings: find the last unclosed string
# Look for patterns like: "value" or "value\n (unterminated)
# Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
# Try to close it by finding the last opening quote and closing it
if result.strip():
# Count quotes - if odd number, we have an unterminated string
quoteCount = result.count('"')
if quoteCount % 2 == 1:
# Find the last opening quote that's not escaped
lastQuotePos = result.rfind('"')
if lastQuotePos >= 0:
# Check if it's escaped
escapeCount = 0
i = lastQuotePos - 1
while i >= 0 and result[i] == '\\':
escapeCount += 1
i -= 1
# If not escaped (even number of backslashes), close the string
if escapeCount % 2 == 0:
# Find where the string should end (before next comma, bracket, or brace)
# For now, just close it at the end
result += '"'
# Count open/close brackets and braces
openBraces = text.count('{')
closeBraces = text.count('}')
openBrackets = text.count('[')
closeBrackets = text.count(']')
openBraces = result.count('{')
closeBraces = result.count('}')
openBrackets = result.count('[')
closeBrackets = result.count(']')
# Close incomplete structures
result = text
for _ in range(openBraces - closeBraces):
result += '}'
for _ in range(openBrackets - closeBrackets):
@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
to avoid circular dependency. Instead, it implements its own repair strategies.
IMPORTANT: Tries to preserve data by using structure closing first before truncation.
"""
sections = []
# Strategy 1: Try progressive parsing to find longest valid JSON prefix
# Find the longest valid JSON prefix that contains sections
# Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
closed_str = closeJsonStructures(text)
obj, err, _ = tryParseJson(closed_str)
if err is None and isinstance(obj, dict):
extracted_sections = extractSectionsFromDocument(obj)
if extracted_sections:
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
return extracted_sections
# Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
# WARNING: This truncates the input and loses data
# Only use if structure closing failed
logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
best_result = None
best_valid_length = 0
for step_size in [1000, 500, 100, 50, 10]:
@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
if err is None and isinstance(obj, dict):
extracted_sections = extractSectionsFromDocument(obj)
if extracted_sections:
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}")
logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
return extracted_sections
# Store best result even if no sections found
if not best_result:

View file

@ -1183,11 +1183,13 @@ Max length: {maxLength} characters
Based on the context, decide which documents to attach.
CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
Return JSON:
{{
"subject": "subject line",
"body": "email body (HTML allowed)",
"attachments": ["doc_ref1", "doc_ref2"]
"attachments": ["docItem:<documentId>:<filename>"]
}}
"""
@ -1237,6 +1239,9 @@ Return JSON:
elif isinstance(ai_attachments, list):
ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
# Initialize normalized_ai_attachments
normalized_ai_attachments = []
if ai_attachments:
try:
ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
@ -1250,16 +1255,20 @@ Return JSON:
selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
if selected_docs:
# Map selected ChatDocuments back to docItem references
# Map selected ChatDocuments back to docItem references (with full filename)
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
# Normalize ai_attachments to full format for storage
normalized_ai_attachments = documentList.copy()
logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
else:
# No intersection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
normalized_ai_attachments = documentList.copy()
logger.warning("AI selected attachments not found in available documents, using all documents")
else:
# No AI selection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
normalized_ai_attachments = documentList.copy()
logger.warning("AI did not specify attachments, using all available documents")
else:
logger.info("No documents provided in documentList; skipping attachment processing")
@ -1363,7 +1372,7 @@ Return JSON:
"cc": cc,
"bcc": bcc,
"attachments": len(documentList),
"aiSelectedAttachments": ai_attachments if ai_attachments else "all documents",
"aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
"aiGenerated": True,
"context": context,
"emailStyle": emailStyle,
@ -1371,12 +1380,40 @@ Return JSON:
"draftData": draft_data
}
# Extract attachment filenames for validation metadata
attachmentFilenames = []
attachmentReferences = []
if documentList:
try:
from modules.datamodels.datamodelDocref import DocumentReferenceList
attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
# Store normalized document references (with filenames) - use normalized_ai_attachments if available
attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
except Exception:
pass
# Create validation metadata for content validator
validationMetadata = {
"actionType": "outlook.composeAndDraftEmailWithContext",
"emailRecipients": to,
"emailCc": cc,
"emailBcc": bcc,
"emailSubject": subject,
"emailAttachments": attachmentFilenames,
"emailAttachmentReferences": attachmentReferences,
"emailAttachmentCount": len(attachmentFilenames),
"emailStyle": emailStyle,
"hasAttachments": len(attachmentFilenames) > 0
}
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(draftResultData, indent=2),
mimeType="application/json"
mimeType="application/json",
validationMetadata=validationMetadata
)]
)
else:

View file

@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
resultData = json.loads(fileData)
foundDocuments = resultData.get("foundDocuments", [])
# If no foundDocuments, check if it's a listDocuments result (has listResults)
if not foundDocuments and "listResults" in resultData:
logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
listResults = resultData.get("listResults", [])
foundDocuments = []
siteIdFromList = None
siteNameFromList = None
for listResult in listResults:
siteResults = listResult.get("siteResults", [])
for siteResult in siteResults:
items = siteResult.get("items", [])
# Extract site info from first item if available
if items and not siteIdFromList:
# Try to get site info from the siteResult structure
# We need to discover sites to get the siteId
siteNameFromList = items[0].get("siteName")
for item in items:
# Convert listDocuments item format to foundDocuments format
if item.get("type") == "file":
foundDoc = {
"id": item.get("id"),
"name": item.get("name"),
"type": "file",
"siteName": item.get("siteName"),
"siteId": None, # Will be determined from site discovery
"webUrl": item.get("webUrl"),
"fullPath": item.get("webUrl", ""),
"parentPath": item.get("parentPath", "")
}
foundDocuments.append(foundDoc)
# Discover sites to get siteId if we have siteName
if foundDocuments and siteNameFromList and not siteIdFromList:
logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
if matchingSites:
siteIdFromList = matchingSites[0].get("id")
# Update all foundDocuments with siteId
for doc in foundDocuments:
doc["siteId"] = siteIdFromList
logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
logger.info(f"Converted {len(foundDocuments)} files from listResults format")
if foundDocuments:
# Extract SharePoint file IDs from foundDocuments
sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
siteName = firstDoc.get("siteName")
siteId = firstDoc.get("siteId")
# If siteId is missing (from listDocuments conversion), discover sites to find it
if siteName and not siteId:
logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteName)
if matchingSites:
siteId = matchingSites[0].get("id")
logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
if siteName and siteId:
sites = [{
"id": siteId,
@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
"webUrl": firstDoc.get("webUrl", "")
}]
logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
elif siteName:
# Try to get site by name
allSites = await self._discoverSharePointSites()
matchingSites = self._filterSitesByHint(allSites, siteName)
if matchingSites:
sites = [{
"id": matchingSites[0].get("id"),
"displayName": siteName,
"webUrl": matchingSites[0].get("webUrl", "")
}]
logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
else:
return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
else:
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
else:

View file

@ -421,14 +421,6 @@ class ContentValidator:
if actionName:
# Convert action name to human-readable format
actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
if "convert" in actionName.lower():
actionDescription = "Document format conversion"
elif "generate" in actionName.lower() or "create" in actionName.lower():
actionDescription = "Document generation"
elif "extract" in actionName.lower():
actionDescription = "Content extraction"
elif "process" in actionName.lower():
actionDescription = "Content processing"
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
# Build action parameters context
@ -441,6 +433,25 @@ class ContentValidator:
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
# Extract validation metadata from documents (action-specific context)
validationMetadataContext = ""
if documents:
metadataList = []
for doc in documents:
metadata = getattr(doc, 'validationMetadata', None)
if metadata and isinstance(metadata, dict):
metadataList.append(metadata)
if metadataList:
# Combine all metadata (usually just one document)
combinedMetadata = {}
for meta in metadataList:
combinedMetadata.update(meta)
if combinedMetadata:
metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
# Format success criteria for display with index numbers
if successCriteria:
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
@ -452,7 +463,7 @@ class ContentValidator:
=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
=== VALIDATION INSTRUCTIONS ===
@ -466,6 +477,7 @@ VALIDATION RULES:
5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
VALIDATION STEPS:
- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
- Check structure summary for quantities, counts, statistics
- Compare found values with required values from criteria
- If structure unavailable, use metadata only (format, filename, size)

View file

@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
Execute task using Automation mode - executes predefined actions directly.
No AI planning or review phases - actions are executed sequentially as defined.
"""
# Get task index from workflow state for consistency
if taskIndex is None:
taskIndex = workflow.getTaskIndex()
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
try:
@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
# Update workflow before executing task
if taskIndex is not None:
self._updateWorkflowBeforeExecutingTask(taskIndex)
self.services.chat.setWorkflowContext(taskNumber=taskIndex)
# Create task start message
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
# Execute action
result = await self.actionExecutor.executeSingleAction(
action, workflow, taskStep, taskIndex, actionNumber, totalActions
action, workflow, taskStep
)
actionResults.append(result)

View file

@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
# Use connectionReference from selection (required)
connectionRef = selection.get('connectionReference')
# If not found at top level, check in selection['parameters'] (guided action case)
if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
connectionRef = selection['parameters'].get('connectionReference')
if connectionRef:
# Check if action actually has connectionReference parameter
methodName, actionName = compoundActionName.split('.', 1)

View file

@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
OBJECTIVE: {{KEY:TASK_OBJECTIVE}}
=== AVAILABLE RESOURCES ===
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
AVAILABLE_CONNECTIONS_INDEX:
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
=== AVAILABLE ACTIONS ===
{{KEY:AVAILABLE_METHODS}}
@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
- Be the next logical incremental step (not complete entire objective in one step)
- Target exactly one output format if producing files
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
- Learn from previous validation feedback and avoid repeated mistakes
- Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)
@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
"successCriteria": ["specific criterion 1", "specific criterion 2"],
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
"learnings": ["..."],
"requiredInputDocuments": ["docList:..."],
"requiredInputDocuments": ["docItem:<documentId>:<filename>", "docList:<label>"],
"requiredConnection": "connection:..." | null,
"parametersContext": "concise text that Stage 2 will use to set business parameters"
}}
@ -115,6 +117,9 @@ Analyze actionObjective to determine:
3. parametersContext: short, sufficient for Stage 2
4. Return ONLY JSON - no markdown, no explanations
5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
- For document lists: use docList:<label> format
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX
7. Plan incrementally: one output format per step
8. Learn from validation feedback - avoid repeating mistakes
@ -307,6 +312,7 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
]
template = """TASK DECISION
@ -321,7 +327,9 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
=== AVAILABLE RESOURCES ===
ACTIONS: {{KEY:AVAILABLE_METHODS}}
DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
AVAILABLE_CONNECTIONS_INDEX:
{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
{{KEY:REVIEW_CONTENT}}
@ -334,12 +342,20 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
- Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
=== OUTPUT FORMAT ===
Return ONLY JSON (no markdown, no explanations). The decision MUST:
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
- Use ONLY exact labels from AVAILABLE_CONNECTIONS_INDEX (connection:...)
- Provide concrete parameter values in nextActionParameters (not placeholders)
- Match parameter names exactly as defined in AVAILABLE_METHODS
{{
"status": "continue",
"reason": "Brief reason explaining why continuing",
"nextAction": "Selected_action_from_ACTIONS",
"nextActionParameters": {{
"documentList": ["docItem:reference_from_DOCUMENTS"],
"documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
"connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
"parameter1": "value1",
"parameter2": "value2"
}},
@ -347,16 +363,21 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
}}
=== RULES ===
- If "continue": MUST provide nextAction and nextActionParameters
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
- CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
- Do NOT repeat failed actions - suggest DIFFERENT approach
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
- If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
1. Return ONLY JSON - no markdown, no explanations
2. If "continue": MUST provide nextAction and nextActionParameters
3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
- For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
- For document lists: use docList:<label> format
- Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
9. Do NOT repeat failed actions - suggest DIFFERENT approach
10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
"""

View file

@ -428,7 +428,7 @@ class WorkflowProcessor:
)
# Prepare AI call options for fast path (balanced, fast processing)
from modules.datamodels.datamodelAi import AiCallOptions
from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest
options = AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
@ -438,16 +438,19 @@ class WorkflowProcessor:
maxProcessingTime=15 # Fast path should complete in 15s
)
# Call AI (content call - no documents needed for fast path)
aiResponse = await self.services.ai.callAiContent(
# Call AI directly (no document generation - just plain text response)
# Use aiObjects.call() instead of callAiContent() to avoid document generation path
aiRequest = AiCallRequest(
prompt=fastPathPrompt,
contentParts=None, # Fast path doesn't process documents
context="",
options=options,
outputFormat=None # Text response, not document generation
contentParts=None # Fast path doesn't process documents
)
# Extract response content (AiResponse.content is a string)
responseText = aiResponse.content if isinstance(aiResponse, str) else (aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse))
aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
# Extract response content (AiCallResponse.content is a string)
responseText = aiCallResponse.content if aiCallResponse.content else ""
# Create ActionResult with response
# For fast path, we create a simple text document with the response

View file

@ -162,30 +162,38 @@ class WorkflowManager:
self.workflowProcessor = WorkflowProcessor(self.services)
# Process user-uploaded documents from userInput for complexity detection
# This is the correct way: use the input data directly, not workflow state
documents = []
if userInput.listFileId:
try:
documents = await self._processFileIds(userInput.listFileId, None)
except Exception as e:
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
# Get workflow mode to determine if complexity detection is needed
workflowMode = getattr(self.services.workflow, 'workflowMode', None)
skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
# Detect complexity (AI-based semantic understanding) using user input documents
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
logger.info(f"Request complexity detected: {complexity}")
if skipComplexityDetection:
logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
complexity = "moderate" # Default for automation workflows
else:
# Process user-uploaded documents from userInput for complexity detection
# This is the correct way: use the input data directly, not workflow state
documents = []
if userInput.listFileId:
try:
documents = await self._processFileIds(userInput.listFileId, None)
except Exception as e:
logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
# Detect complexity (AI-based semantic understanding) using user input documents
complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
logger.info(f"Request complexity detected: {complexity}")
# Now send the first message (which will also process the documents again, but that's fine)
await self._sendFirstMessage(userInput)
# Route to fast path for simple requests
if complexity == "simple":
# Route to fast path for simple requests (skip for automation mode)
if not skipComplexityDetection and complexity == "simple":
logger.info("Routing to fast path for simple request")
await self._executeFastPath(userInput, documents)
return # Fast path completes the workflow
# Route to full workflow for moderate/complex requests
logger.info(f"Routing to full workflow for {complexity} request")
# Route to full workflow for moderate/complex requests or automation mode
logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
taskPlan = await self._planTasks(userInput)
await self._executeTasks(taskPlan)
await self._processWorkflowResults()