From b401be703f37797a63fa32192ce9e9b3fdc2ccd5 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Mon, 1 Dec 2025 19:15:50 +0100
Subject: [PATCH] integration testing of adapted ai workflow with fixes
---
modules/datamodels/datamodelChat.py | 4 +
modules/interfaces/interfaceAiObjects.py | 10 +-
modules/services/serviceAi/mainServiceAi.py | 400 ++++++++++--------
.../serviceAi/subJsonResponseHandling.py | 39 +-
.../services/serviceChat/mainServiceChat.py | 5 +-
.../mainServiceExtraction.py | 30 ++
.../subPromptBuilderExtraction.py | 11 +-
.../renderers/rendererXlsx.py | 24 +-
.../subPromptBuilderGeneration.py | 66 ++-
modules/shared/jsonUtils.py | 90 +++-
modules/workflows/methods/methodOutlook.py | 45 +-
modules/workflows/methods/methodSharepoint.py | 69 +++
.../processing/adaptive/contentValidator.py | 30 +-
.../processing/modes/modeAutomation.py | 7 +-
.../workflows/processing/modes/modeDynamic.py | 5 +
.../shared/promptGenerationActionsDynamic.py | 51 ++-
.../workflows/processing/workflowProcessor.py | 17 +-
modules/workflows/workflowManager.py | 38 +-
18 files changed, 648 insertions(+), 293 deletions(-)
diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py
index 9caf11f8..4a678c8b 100644
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
None,
description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
)
+ validationMetadata: Optional[Dict[str, Any]] = Field(
+ None,
+ description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
+ )
registerModelLabels(
diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py
index 18673987..7dc7db6b 100644
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@@ -262,11 +262,17 @@ class AiObjects:
logger.info(f"✅ Image content part processed successfully with model: {model.name}")
# Convert to AiCallResponse format
+ # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
+ # Calculate processing time if not provided (fallback to 0.0)
+ processingTime = getattr(modelResponse, 'processingTime', None)
+ if processingTime is None:
+ processingTime = 0.0
+
return AiCallResponse(
content=modelResponse.content,
modelName=model.name,
- priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
- processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
+ priceUsd=0.0, # Price will be calculated elsewhere if needed
+ processingTime=processingTime,
bytesSent=0, # Will be calculated elsewhere
bytesReceived=0, # Will be calculated elsewhere
errorCount=0
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 2a4f304c..57f81aa7 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
)
try:
+ # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
+ if not outputFormat:
+ outputFormat = "txt"
+
# Extraction is now separate - contentParts must be extracted before calling
# Require operationType to be set before calling
opType = getattr(options, "operationType", None)
if not opType:
- # If outputFormat is specified, default to DATA_GENERATE
- if outputFormat:
- options.operationType = OperationTypeEnum.DATA_GENERATE
- opType = OperationTypeEnum.DATA_GENERATE
- else:
- self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
- analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
- if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
- options.operationType = analyzedOptions.operationType
- # Merge other analyzed options
- if hasattr(analyzedOptions, "priority"):
- options.priority = analyzedOptions.priority
- if hasattr(analyzedOptions, "processingMode"):
- options.processingMode = analyzedOptions.processingMode
- if hasattr(analyzedOptions, "compressPrompt"):
- options.compressPrompt = analyzedOptions.compressPrompt
- if hasattr(analyzedOptions, "compressContext"):
- options.compressContext = analyzedOptions.compressContext
- else:
- # Default to DATA_ANALYSE if analysis fails
- options.operationType = OperationTypeEnum.DATA_ANALYSE
- opType = options.operationType
+ # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
+ options.operationType = OperationTypeEnum.DATA_GENERATE
+ opType = OperationTypeEnum.DATA_GENERATE
# Handle IMAGE_GENERATE operations
if opType == OperationTypeEnum.IMAGE_GENERATE:
@@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
self.services.chat.progressLogFinish(aiOperationId, False)
raise ValueError(errorMsg)
- # Handle document generation (outputFormat specified)
- if outputFormat:
- # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
- options.compressPrompt = False
- options.compressContext = False
+ # Handle document generation (outputFormat always set, defaults to "txt")
+ # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
+ # outputFormat is always set now (defaults to "txt" if not specified)
+
+ # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
+ options.compressPrompt = False
+ options.compressContext = False
+
+ # Process contentParts for generation prompt (if provided)
+ # Use generic _callWithContentParts() which handles all content types (images, text, etc.)
+ # This automatically processes images with vision models and merges all results
+ if contentParts:
+ # Filter out binary/other parts that shouldn't be processed
+ processableParts = []
+ skippedParts = []
+ for p in contentParts:
+ if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
+ processableParts.append(p)
+ else:
+ skippedParts.append(p)
- # Convert contentParts to text for generation prompt (if provided)
- if contentParts:
- # Convert contentParts to text for generation prompt
- content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
+ if skippedParts:
+ logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
+
+ if processableParts:
+ # Count images for progress update
+ imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
+ if imageCount > 0:
+ self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
+
+ # Build proper extraction prompt using buildExtractionPrompt
+ # This creates a focused extraction prompt, not the user's generation prompt
+ from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
+
+ # Determine renderer for format-specific guidelines
+ renderer = None
+ if outputFormat:
+ try:
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generationService = GenerationService(self.services)
+ renderer = generationService.getRendererForFormat(outputFormat)
+ except Exception as e:
+ logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
+
+ extractionPrompt = await buildExtractionPrompt(
+ outputFormat=outputFormat or "txt",
+ userPrompt=prompt, # User's prompt as context for what to extract
+ title=title or "Document",
+ aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
+ services=self.services,
+ renderer=renderer
+ )
+
+ logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
+
+ # Use DATA_EXTRACT operation type for extraction
+ extractionOptions = AiCallOptions(
+ operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction
+ compressPrompt=options.compressPrompt,
+ compressContext=options.compressContext
+ )
+
+ extractionRequest = AiCallRequest(
+ prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt
+ context="",
+ options=extractionOptions,
+ contentParts=processableParts
+ )
+
+ # Write debug file for extraction prompt (all parts)
+ self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
+
+ # Call generic content parts processor - handles images, text, chunking, merging
+ extractionResponse = await self.aiObjects.call(extractionRequest)
+
+ # Write debug file for extraction response
+ if extractionResponse.content:
+ self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
+ else:
+ self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
+ logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
+
+ # Use extracted content directly for generation prompt
+ if extractionResponse.errorCount == 0 and extractionResponse.content:
+ # The extracted content is already merged and ready to use
+ content_for_generation = extractionResponse.content
+ logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
+ else:
+ # Extraction failed - use placeholders
+ logger.warning(f"Content extraction failed, using placeholders")
+ placeholderParts = []
+ for p in processableParts:
+ placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
+ content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
else:
content_for_generation = None
-
- self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
- from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
-
- generation_prompt = await buildGenerationPrompt(
- outputFormat, prompt, title, content_for_generation, None
+ logger.debug("No processable parts found in contentParts")
+ else:
+ content_for_generation = None
+
+ self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
+ from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
+
+ generation_prompt = await buildGenerationPrompt(
+ outputFormat, prompt, title, content_for_generation, None
+ )
+
+ promptArgs = {
+ "outputFormat": outputFormat,
+ "userPrompt": prompt,
+ "title": title,
+ "extracted_content": content_for_generation
+ }
+
+ self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
+ # Extract user prompt from promptArgs for task completion analysis
+ userPrompt = None
+ if promptArgs:
+ userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
+
+ generated_json = await self._callAiWithLooping(
+ generation_prompt,
+ options,
+ "document_generation",
+ buildGenerationPrompt,
+ promptArgs,
+ aiOperationId,
+ userPrompt=userPrompt
+ )
+
+ self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
+ try:
+ extracted_json = self.services.utils.jsonExtractString(generated_json)
+ generated_data = json.loads(extracted_json)
+ except json.JSONDecodeError as e:
+ logger.error(f"Failed to parse generated JSON: {str(e)}")
+ self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
+ self.services.chat.progressLogFinish(aiOperationId, False)
+ raise ValueError(f"Generated content is not valid JSON: {str(e)}")
+
+ # Extract title and filename from generated document structure
+ extractedTitle = title
+ extractedFilename = None
+ if isinstance(generated_data, dict) and "documents" in generated_data:
+ docs = generated_data["documents"]
+ if isinstance(docs, list) and len(docs) > 0:
+ firstDoc = docs[0]
+ if isinstance(firstDoc, dict):
+ if firstDoc.get("title"):
+ extractedTitle = firstDoc["title"]
+ if firstDoc.get("filename"):
+ extractedFilename = firstDoc["filename"]
+
+ # Ensure metadata contains the extracted title
+ if "metadata" not in generated_data:
+ generated_data["metadata"] = {}
+ if extractedTitle:
+ generated_data["metadata"]["title"] = extractedTitle
+
+ # Create separate operation for content rendering
+ renderOperationId = f"{aiOperationId}_render"
+ renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
+ self.services.chat.progressLogStart(
+ renderOperationId,
+ "Content Rendering",
+ "Rendering",
+ f"Format: {outputFormat}",
+ parentId=renderParentLogId
+ )
+
+ try:
+ from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+ generationService = GenerationService(self.services)
+ self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
+ rendered_content, mime_type = await generationService.renderReport(
+ generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
)
+ self.services.chat.progressLogFinish(renderOperationId, True)
- promptArgs = {
- "outputFormat": outputFormat,
- "userPrompt": prompt,
- "title": title,
- "extracted_content": content_for_generation
- }
-
- self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
- # Extract user prompt from promptArgs for task completion analysis
- userPrompt = None
- if promptArgs:
- userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
-
- generated_json = await self._callAiWithLooping(
- generation_prompt,
- options,
- "document_generation",
- buildGenerationPrompt,
- promptArgs,
- aiOperationId,
- userPrompt=userPrompt
- )
-
- self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
- try:
- extracted_json = self.services.utils.jsonExtractString(generated_json)
- generated_data = json.loads(extracted_json)
- except json.JSONDecodeError as e:
- logger.error(f"Failed to parse generated JSON: {str(e)}")
- self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
- self.services.chat.progressLogFinish(aiOperationId, False)
- raise ValueError(f"Generated content is not valid JSON: {str(e)}")
-
- # Extract title and filename from generated document structure
- extractedTitle = title
- extractedFilename = None
- if isinstance(generated_data, dict) and "documents" in generated_data:
- docs = generated_data["documents"]
- if isinstance(docs, list) and len(docs) > 0:
- firstDoc = docs[0]
- if isinstance(firstDoc, dict):
- if firstDoc.get("title"):
- extractedTitle = firstDoc["title"]
- if firstDoc.get("filename"):
- extractedFilename = firstDoc["filename"]
-
- # Ensure metadata contains the extracted title
- if "metadata" not in generated_data:
- generated_data["metadata"] = {}
- if extractedTitle:
- generated_data["metadata"]["title"] = extractedTitle
-
- # Create separate operation for content rendering
- renderOperationId = f"{aiOperationId}_render"
- renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
- self.services.chat.progressLogStart(
- renderOperationId,
- "Content Rendering",
- "Rendering",
- f"Format: {outputFormat}",
- parentId=renderParentLogId
- )
-
- try:
- from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
- generationService = GenerationService(self.services)
- self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
- rendered_content, mime_type = await generationService.renderReport(
- generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
- )
- self.services.chat.progressLogFinish(renderOperationId, True)
-
- # Determine document name
- if extractedFilename:
- documentName = extractedFilename
- elif extractedTitle and extractedTitle != "Generated Document":
- sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
- sanitized = re.sub(r"_+", "_", sanitized).strip("_")
- if sanitized:
- if not sanitized.lower().endswith(f".{outputFormat}"):
- documentName = f"{sanitized}.{outputFormat}"
- else:
- documentName = sanitized
+ # Determine document name
+ if extractedFilename:
+ documentName = extractedFilename
+ elif extractedTitle and extractedTitle != "Generated Document":
+ sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
+ sanitized = re.sub(r"_+", "_", sanitized).strip("_")
+ if sanitized:
+ if not sanitized.lower().endswith(f".{outputFormat}"):
+ documentName = f"{sanitized}.{outputFormat}"
else:
- documentName = f"generated.{outputFormat}"
+ documentName = sanitized
else:
documentName = f"generated.{outputFormat}"
-
- # Build document data
- docData = DocumentData(
- documentName=documentName,
- documentData=rendered_content,
- mimeType=mime_type,
- sourceJson=generated_data # Preserve source JSON for structure validation
- )
-
- metadata = AiResponseMetadata(
- title=extractedTitle or title or "Generated Document",
- filename=extractedFilename,
- operationType=opType.value if opType else None
- )
-
- self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
- self.services.chat.progressLogFinish(aiOperationId, True)
-
- return AiResponse(
- content=json.dumps(generated_data),
- metadata=metadata,
- documents=[docData]
- )
-
- except Exception as e:
- logger.error(f"Error rendering document: {str(e)}")
- if renderOperationId:
- self.services.chat.progressLogFinish(renderOperationId, False)
- self.services.chat.progressLogFinish(aiOperationId, False)
- raise ValueError(f"Rendering failed: {str(e)}")
-
- # Handle text processing (no outputFormat)
- self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
-
- if contentParts:
- # Process contentParts through AI
- # Convert contentParts to text for prompt
- contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
- fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
- result_content = await self._callAiWithLooping(
- fullPrompt, options, "text", None, None, aiOperationId
+ else:
+ documentName = f"generated.{outputFormat}"
+
+ # Build document data
+ docData = DocumentData(
+ documentName=documentName,
+ documentData=rendered_content,
+ mimeType=mime_type,
+ sourceJson=generated_data # Preserve source JSON for structure validation
)
- else:
- # Direct text call (no documents to process)
- result_content = await self._callAiWithLooping(
- prompt, options, "text", None, None, aiOperationId
+
+ metadata = AiResponseMetadata(
+ title=extractedTitle or title or "Generated Document",
+ filename=extractedFilename,
+ operationType=opType.value if opType else None
)
-
- metadata = AiResponseMetadata(
- operationType=opType.value if opType else None
- )
-
- self.services.chat.progressLogFinish(aiOperationId, True)
-
- return AiResponse(
- content=result_content,
- metadata=metadata
- )
+
+ # Write JSON with proper formatting (not str() which can truncate)
+ jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
+ self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
+ self.services.chat.progressLogFinish(aiOperationId, True)
+
+ return AiResponse(
+ content=json.dumps(generated_data),
+ metadata=metadata,
+ documents=[docData]
+ )
+
+ except Exception as e:
+ logger.error(f"Error rendering document: {str(e)}")
+ if renderOperationId:
+ self.services.chat.progressLogFinish(renderOperationId, False)
+ self.services.chat.progressLogFinish(aiOperationId, False)
+ raise ValueError(f"Rendering failed: {str(e)}")
except Exception as e:
logger.error(f"Error in callAiContent: {str(e)}")
diff --git a/modules/services/serviceAi/subJsonResponseHandling.py b/modules/services/serviceAi/subJsonResponseHandling.py
index 558536b1..f04484d7 100644
--- a/modules/services/serviceAi/subJsonResponseHandling.py
+++ b/modules/services/serviceAi/subJsonResponseHandling.py
@@ -1236,8 +1236,12 @@ class JsonResponseHandler:
# Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
+ # Handle None (path doesn't exist - incomplete JSON)
+ if value is None:
+ updatedKpi["currentValue"] = kpi.get("currentValue", 0)
+ logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
- if isinstance(value, list):
+ elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
elif isinstance(value, (int, float)):
@@ -1296,8 +1300,12 @@ class JsonResponseHandler:
# Extract value using path
value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
+ # Handle None (path doesn't exist - incomplete JSON)
+ if value is None:
+ updatedKpi["currentValue"] = kpi.get("currentValue", 0)
+ logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
# Count items/rows/elements based on type
- if isinstance(value, list):
+ elif isinstance(value, list):
updatedKpi["currentValue"] = len(value)
logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
elif isinstance(value, (int, float)):
@@ -1321,6 +1329,7 @@ class JsonResponseHandler:
Extract value from object using dot-notation path with array indices.
Example: "sections[0].elements[0].items"
+ Returns None if path doesn't exist (for incomplete JSON handling).
"""
parts = path.split('.')
current = obj
@@ -1332,20 +1341,30 @@ class JsonResponseHandler:
index = int(part[part.index('[') + 1:part.index(']')])
if key:
- current = current.get(key, [])
- if isinstance(current, list) and 0 <= index < len(current):
- current = current[index]
+ if isinstance(current, dict):
+ current = current.get(key)
+ if current is None:
+ return None # Key doesn't exist
+ else:
+ return None # Can't access key on non-dict
+
+ if isinstance(current, list):
+ if 0 <= index < len(current):
+ current = current[index]
+ else:
+ # Index out of range - return None for incomplete JSON
+ return None
else:
- raise KeyError(f"Invalid index {index} for {key}")
+ # Not a list, can't index
+ return None
else:
# Handle dict access
if isinstance(current, dict):
current = current.get(part)
+ if current is None:
+ return None # Key doesn't exist
else:
- raise KeyError(f"Cannot access {part} on {type(current)}")
-
- if current is None:
- raise KeyError(f"Path {path} returned None at {part}")
+ return None # Can't access key on non-dict
return current
diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py
index b1c4d879..9ff148a8 100644
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@@ -92,13 +92,16 @@ class ChatService:
if docRef.startswith("docItem:"):
# docItem:: or docItem: (filename is optional)
# ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
+ # Both formats are supported: docItem: and docItem::
parts = docRef.split(':')
if len(parts) >= 2:
docId = parts[1] # This should be the documentId (UUID)
docFound = False
# ALWAYS try to match by documentId first (regardless of number of parts)
- # This handles: docItem:documentId and docItem:documentId:filename
+ # This handles both formats:
+ # - docItem: (without filename - still works)
+ # - docItem:: (with filename - preferred)
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index c35e6156..d8db9922 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -138,6 +138,36 @@ class ExtractionService:
f"extraction.process.{doc.mimeType}"
)
+ # Write extraction results to debug file
+ try:
+ from modules.shared.debugLogger import writeDebugFile
+ import json
+ # Create summary of extraction results for debug
+ extractionSummary = {
+ "documentName": doc.fileName,
+ "documentMimeType": doc.mimeType,
+ "partsCount": len(ec.parts),
+ "parts": []
+ }
+ for part in ec.parts:
+ partSummary = {
+ "typeGroup": part.typeGroup,
+ "mimeType": part.mimeType,
+ "label": part.label,
+ "dataLength": len(part.data) if part.data else 0,
+ "metadata": part.metadata
+ }
+ # Include data preview for small parts (first 500 chars)
+ if part.data and len(part.data) <= 500:
+ partSummary["dataPreview"] = part.data[:500]
+ elif part.data:
+ partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
+ extractionSummary["parts"].append(partSummary)
+
+ writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
+ except Exception as e:
+ logger.debug(f"Failed to write extraction debug file: {str(e)}")
+
results.append(ec)
return results
diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
index f6329a5c..7b91579a 100644
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@@ -99,9 +99,16 @@ async def buildExtractionPrompt(
# Parse extraction intent if AI service is available
extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
- # Build base prompt
+ # Build base prompt with clear user prompt markers
+ sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
adaptive_prompt = f"""
-{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
+{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{sanitized_user_prompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}
You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index 9fca82e9..b797aba3 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
sheetNames.append(sectionTitle[:31]) # Excel sheet name limit
else:
- # Single table or mixed content - create main sheet
+ # Single table or mixed content - create only main sheet
documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
sheetNames.append(documentTitle[:31]) # Excel sheet name limit
-
- # Add additional sheets for other content types
- contentTypes = set()
- for section in sections:
- contentType = section.get("content_type", "paragraph")
- contentTypes.add(contentType)
-
- if "table" in contentTypes and len(tableSections) == 1:
- sheetNames.append("Table Data")
- if "list" in contentTypes:
- sheetNames.append("Lists")
- if "paragraph" in contentTypes or "heading" in contentTypes:
- sheetNames.append("Text")
- # Limit to 4 sheets maximum
- return sheetNames[:4]
+ return sheetNames
def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
"""Populate Excel sheets with content from JSON based on actual sheet names."""
@@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
sheetTitle = caption
self._populateTableSheet(sheet, section, styles, sheetTitle)
else:
- # Single table or mixed content - use original logic
+ # Single table or mixed content - populate only main sheet
firstSheetName = sheetNames[0]
self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
- # If we have multiple sheets, distribute content by type
- if len(sheetNames) > 1:
- self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
-
except Exception as e:
self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
index 92ab8664..91011539 100644
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@@ -72,7 +72,13 @@ async def buildGenerationPrompt(
continuationText += "Start directly with the next element/section that should follow.\n\n"
# PROMPT FOR CONTINUATION
- generationPrompt = f"""User request: "{userPrompt}"
+ generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}
⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
@@ -93,8 +99,57 @@ Continue generating the remaining content now.
else:
# PROMPT FOR FIRST CALL
+ # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
+
+ if extracted_content:
+ # If we have extracted content, put it FIRST and make it very clear it's the source data
+ generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}
- generationPrompt = f"""User request: "{userPrompt}"
+{'='*80}
+⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
+{'='*80}
+The content below contains the ACTUAL DATA extracted from the source documents.
+You MUST use this data - DO NOT generate fake or example data.
+{'='*80}
+EXTRACTED CONTENT FROM DOCUMENTS:
+{'='*80}
+{extracted_content}
+{'='*80}
+END OF EXTRACTED CONTENT
+{'='*80}
+
+Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
+The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
+You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
+
+JSON structure template (structure only - use data from EXTRACTED CONTENT above):
+{jsonTemplate}
+
+Instructions:
+- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
+- Do NOT reuse example section IDs; create your own.
+- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
+- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
+- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
+- Output JSON only; no markdown fences or extra text.
+
+Generate your complete response using the extracted content data.
+"""
+ else:
+ # No extracted content - generate from scratch
+ generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}
Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
@@ -111,12 +166,5 @@ Instructions:
Generate your complete response.
"""
- # If we have extracted content, prepend it to the prompt
- if extracted_content:
- generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
-{extracted_content}
-
-{generationPrompt}"""
-
return generationPrompt.strip()
diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py
index d2805ecb..46eb4380 100644
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
Attempt to repair broken JSON using multiple strategies.
Generic solution that works for any content type.
Returns the best repair attempt or None if all fail.
+
+ IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
+ Only uses truncation as a last resort when structure closing fails.
"""
if not text:
return None
- # Strategy 1: Try to extract sections from the entire text first
+ # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
+ # This preserves all data and should be tried first
+ closedStr = closeJsonStructures(text)
+ obj, err, _ = tryParseJson(closedStr)
+ if err is None and isinstance(obj, dict):
+ sections = extractSectionsFromDocument(obj)
+ if sections:
+ logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
+ return obj
+ else:
+ # Structure closing worked but no sections found - still return it
+ logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
+ return obj
+
+ # Strategy 2: Try to extract sections from the entire text using regex
# This handles cases where the JSON structure is broken but content is intact
+ # NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
extractedSections = _extractSectionsRegex(text)
if extractedSections:
logger.info(f"Extracted {len(extractedSections)} sections using regex")
@@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
"documents": [{"sections": extractedSections}]
}
- # Strategy 2: Progressive parsing - try to find longest valid prefix
+ # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
+ # WARNING: This strategy truncates the input and loses data after the truncation point
+ # Only use as last resort when other strategies fail
+ logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
bestResult = None
bestValidLength = 0
@@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
if err is None and isinstance(obj, dict):
bestResult = obj
bestValidLength = i
- logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
+ logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
break
if bestResult:
break
if bestResult:
- logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
+ logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
# Check if we have sections in the result
sections = extractSectionsFromDocument(bestResult)
@@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
bestResult["documents"][0]["sections"].extend(extractedSections)
return bestResult
- # Strategy 3: Structure closing - close incomplete structures
- closedStr = closeJsonStructures(text)
- obj, err, _ = tryParseJson(closedStr)
- if err is None and isinstance(obj, dict):
- logger.info("Repaired JSON using structure closing")
- return obj
-
logger.warning("All repair strategies failed")
return None
@@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
def closeJsonStructures(text: str) -> str:
"""
Close incomplete JSON structures by adding missing closing brackets.
+ Also handles unterminated strings by closing them.
"""
if not text:
return text
+ result = text
+
+ # Handle unterminated strings: find the last unclosed string
+ # Look for patterns like: "value" or "value\n (unterminated)
+ # Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
+ # Try to close it by finding the last opening quote and closing it
+ if result.strip():
+ # Count quotes - if odd number, we have an unterminated string
+ quoteCount = result.count('"')
+ if quoteCount % 2 == 1:
+ # Find the last opening quote that's not escaped
+ lastQuotePos = result.rfind('"')
+ if lastQuotePos >= 0:
+ # Check if it's escaped
+ escapeCount = 0
+ i = lastQuotePos - 1
+ while i >= 0 and result[i] == '\\':
+ escapeCount += 1
+ i -= 1
+ # If not escaped (even number of backslashes), close the string
+ if escapeCount % 2 == 0:
+ # Find where the string should end (before next comma, bracket, or brace)
+ # For now, just close it at the end
+ result += '"'
+
# Count open/close brackets and braces
- openBraces = text.count('{')
- closeBraces = text.count('}')
- openBrackets = text.count('[')
- closeBrackets = text.count(']')
+ openBraces = result.count('{')
+ closeBraces = result.count('}')
+ openBrackets = result.count('[')
+ closeBrackets = result.count(']')
# Close incomplete structures
- result = text
for _ in range(openBraces - closeBraces):
result += '}'
for _ in range(openBrackets - closeBrackets):
@@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
to avoid circular dependency. Instead, it implements its own repair strategies.
+
+ IMPORTANT: Tries to preserve data by using structure closing first before truncation.
"""
sections = []
- # Strategy 1: Try progressive parsing to find longest valid JSON prefix
- # Find the longest valid JSON prefix that contains sections
+ # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
+ closed_str = closeJsonStructures(text)
+ obj, err, _ = tryParseJson(closed_str)
+ if err is None and isinstance(obj, dict):
+ extracted_sections = extractSectionsFromDocument(obj)
+ if extracted_sections:
+ logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
+ return extracted_sections
+
+ # Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
+ # WARNING: This truncates the input and loses data
+ # Only use if structure closing failed
+ logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
best_result = None
best_valid_length = 0
for step_size in [1000, 500, 100, 50, 10]:
@@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
if err is None and isinstance(obj, dict):
extracted_sections = extractSectionsFromDocument(obj)
if extracted_sections:
- logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}")
+ logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
return extracted_sections
# Store best result even if no sections found
if not best_result:
diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py
index 99768e07..fa7b4e47 100644
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@@ -1183,11 +1183,13 @@ Max length: {maxLength} characters
Based on the context, decide which documents to attach.
+CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:: format (include filename)
+
Return JSON:
{{
"subject": "subject line",
"body": "email body (HTML allowed)",
- "attachments": ["doc_ref1", "doc_ref2"]
+ "attachments": ["docItem::"]
}}
"""
@@ -1237,6 +1239,9 @@ Return JSON:
elif isinstance(ai_attachments, list):
ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
+ # Initialize normalized_ai_attachments
+ normalized_ai_attachments = []
+
if ai_attachments:
try:
ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
@@ -1250,16 +1255,20 @@ Return JSON:
selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
if selected_docs:
- # Map selected ChatDocuments back to docItem references
+ # Map selected ChatDocuments back to docItem references (with full filename)
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
+ # Normalize ai_attachments to full format for storage
+ normalized_ai_attachments = documentList.copy()
logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
else:
# No intersection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+ normalized_ai_attachments = documentList.copy()
logger.warning("AI selected attachments not found in available documents, using all documents")
else:
# No AI selection; use all available documents
documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+ normalized_ai_attachments = documentList.copy()
logger.warning("AI did not specify attachments, using all available documents")
else:
logger.info("No documents provided in documentList; skipping attachment processing")
@@ -1363,7 +1372,7 @@ Return JSON:
"cc": cc,
"bcc": bcc,
"attachments": len(documentList),
- "aiSelectedAttachments": ai_attachments if ai_attachments else "all documents",
+ "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
"aiGenerated": True,
"context": context,
"emailStyle": emailStyle,
@@ -1371,12 +1380,40 @@ Return JSON:
"draftData": draft_data
}
+ # Extract attachment filenames for validation metadata
+ attachmentFilenames = []
+ attachmentReferences = []
+ if documentList:
+ try:
+ from modules.datamodels.datamodelDocref import DocumentReferenceList
+ attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
+ attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
+ # Store normalized document references (with filenames) - use normalized_ai_attachments if available
+ attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
+ except Exception:
+ pass
+
+ # Create validation metadata for content validator
+ validationMetadata = {
+ "actionType": "outlook.composeAndDraftEmailWithContext",
+ "emailRecipients": to,
+ "emailCc": cc,
+ "emailBcc": bcc,
+ "emailSubject": subject,
+ "emailAttachments": attachmentFilenames,
+ "emailAttachmentReferences": attachmentReferences,
+ "emailAttachmentCount": len(attachmentFilenames),
+ "emailStyle": emailStyle,
+ "hasAttachments": len(attachmentFilenames) > 0
+ }
+
return ActionResult(
success=True,
documents=[ActionDocument(
documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
documentData=json.dumps(draftResultData, indent=2),
- mimeType="application/json"
+ mimeType="application/json",
+ validationMetadata=validationMetadata
)]
)
else:
diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py
index 2c773989..92d77e8e 100644
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
resultData = json.loads(fileData)
foundDocuments = resultData.get("foundDocuments", [])
+ # If no foundDocuments, check if it's a listDocuments result (has listResults)
+ if not foundDocuments and "listResults" in resultData:
+ logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
+ listResults = resultData.get("listResults", [])
+ foundDocuments = []
+ siteIdFromList = None
+ siteNameFromList = None
+
+ for listResult in listResults:
+ siteResults = listResult.get("siteResults", [])
+ for siteResult in siteResults:
+ items = siteResult.get("items", [])
+ # Extract site info from first item if available
+ if items and not siteIdFromList:
+ # Try to get site info from the siteResult structure
+ # We need to discover sites to get the siteId
+ siteNameFromList = items[0].get("siteName")
+
+ for item in items:
+ # Convert listDocuments item format to foundDocuments format
+ if item.get("type") == "file":
+ foundDoc = {
+ "id": item.get("id"),
+ "name": item.get("name"),
+ "type": "file",
+ "siteName": item.get("siteName"),
+ "siteId": None, # Will be determined from site discovery
+ "webUrl": item.get("webUrl"),
+ "fullPath": item.get("webUrl", ""),
+ "parentPath": item.get("parentPath", "")
+ }
+ foundDocuments.append(foundDoc)
+
+ # Discover sites to get siteId if we have siteName
+ if foundDocuments and siteNameFromList and not siteIdFromList:
+ logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
+ if matchingSites:
+ siteIdFromList = matchingSites[0].get("id")
+ # Update all foundDocuments with siteId
+ for doc in foundDocuments:
+ doc["siteId"] = siteIdFromList
+ logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
+
+ logger.info(f"Converted {len(foundDocuments)} files from listResults format")
+
if foundDocuments:
# Extract SharePoint file IDs from foundDocuments
sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
@@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
siteName = firstDoc.get("siteName")
siteId = firstDoc.get("siteId")
+ # If siteId is missing (from listDocuments conversion), discover sites to find it
+ if siteName and not siteId:
+ logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ siteId = matchingSites[0].get("id")
+ logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
+
if siteName and siteId:
sites = [{
"id": siteId,
@@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
"webUrl": firstDoc.get("webUrl", "")
}]
logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
+ elif siteName:
+ # Try to get site by name
+ allSites = await self._discoverSharePointSites()
+ matchingSites = self._filterSitesByHint(allSites, siteName)
+ if matchingSites:
+ sites = [{
+ "id": matchingSites[0].get("id"),
+ "displayName": siteName,
+ "webUrl": matchingSites[0].get("webUrl", "")
+ }]
+ logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
+ else:
+ return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
else:
return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
else:
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index a2c93288..b065b912 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -421,14 +421,6 @@ class ContentValidator:
if actionName:
# Convert action name to human-readable format
actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
- if "convert" in actionName.lower():
- actionDescription = "Document format conversion"
- elif "generate" in actionName.lower() or "create" in actionName.lower():
- actionDescription = "Document generation"
- elif "extract" in actionName.lower():
- actionDescription = "Content extraction"
- elif "process" in actionName.lower():
- actionDescription = "Content processing"
actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
# Build action parameters context
@@ -441,6 +433,25 @@ class ContentValidator:
paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
+ # Extract validation metadata from documents (action-specific context)
+ validationMetadataContext = ""
+ if documents:
+ metadataList = []
+ for doc in documents:
+ metadata = getattr(doc, 'validationMetadata', None)
+ if metadata and isinstance(metadata, dict):
+ metadataList.append(metadata)
+
+ if metadataList:
+ # Combine all metadata (usually just one document)
+ combinedMetadata = {}
+ for meta in metadataList:
+ combinedMetadata.update(meta)
+
+ if combinedMetadata:
+ metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
+ validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
+
# Format success criteria for display with index numbers
if successCriteria:
criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
@@ -452,7 +463,7 @@ class ContentValidator:
=== TASK INFORMATION ===
{objectiveLabel}: '{objectiveText}'
EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
=== VALIDATION INSTRUCTIONS ===
@@ -466,6 +477,7 @@ VALIDATION RULES:
5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
VALIDATION STEPS:
+- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
- Check structure summary for quantities, counts, statistics
- Compare found values with required values from criteria
- If structure unavailable, use metadata only (format, filename, size)
diff --git a/modules/workflows/processing/modes/modeAutomation.py b/modules/workflows/processing/modes/modeAutomation.py
index b8600e0f..996462b7 100644
--- a/modules/workflows/processing/modes/modeAutomation.py
+++ b/modules/workflows/processing/modes/modeAutomation.py
@@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
Execute task using Automation mode - executes predefined actions directly.
No AI planning or review phases - actions are executed sequentially as defined.
"""
+ # Get task index from workflow state for consistency
+ if taskIndex is None:
+ taskIndex = workflow.getTaskIndex()
+
logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
try:
@@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
# Update workflow before executing task
if taskIndex is not None:
self._updateWorkflowBeforeExecutingTask(taskIndex)
- self.services.chat.setWorkflowContext(taskNumber=taskIndex)
# Create task start message
await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
@@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
# Execute action
result = await self.actionExecutor.executeSingleAction(
- action, workflow, taskStep, taskIndex, actionNumber, totalActions
+ action, workflow, taskStep
)
actionResults.append(result)
diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py
index dac6211f..5cc8b866 100644
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
# Use connectionReference from selection (required)
connectionRef = selection.get('connectionReference')
+
+ # If not found at top level, check in selection['parameters'] (guided action case)
+ if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
+ connectionRef = selection['parameters'].get('connectionReference')
+
if connectionRef:
# Check if action actually has connectionReference parameter
methodName, actionName = compoundActionName.split('.', 1)
diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
index ae59fafc..a58467fb 100644
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
OBJECTIVE: {{KEY:TASK_OBJECTIVE}}
=== AVAILABLE RESOURCES ===
-DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
+AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
{{KEY:AVAILABLE_DOCUMENTS_INDEX}}
-CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
+AVAILABLE_CONNECTIONS_INDEX:
+{{KEY:AVAILABLE_CONNECTIONS_INDEX}}
=== AVAILABLE ACTIONS ===
{{KEY:AVAILABLE_METHODS}}
@@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
- Be the next logical incremental step (not complete entire objective in one step)
- Target exactly one output format if producing files
- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
+- ALWAYS use FULL document references with filename: docItem:: (filename is required)
- Learn from previous validation feedback and avoid repeated mistakes
- Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)
@@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
"successCriteria": ["specific criterion 1", "specific criterion 2"],
"userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
"learnings": ["..."],
- "requiredInputDocuments": ["docList:..."],
+ "requiredInputDocuments": ["docItem::", "docList: