diff --git a/modules/datamodels/datamodelChat.py b/modules/datamodels/datamodelChat.py index 9caf11f8..4a678c8b 100644 --- a/modules/datamodels/datamodelChat.py +++ b/modules/datamodels/datamodelChat.py @@ -400,6 +400,10 @@ class ActionDocument(BaseModel): None, description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)" ) + validationMetadata: Optional[Dict[str, Any]] = Field( + None, + description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)" + ) registerModelLabels( diff --git a/modules/interfaces/interfaceAiObjects.py b/modules/interfaces/interfaceAiObjects.py index 18673987..7dc7db6b 100644 --- a/modules/interfaces/interfaceAiObjects.py +++ b/modules/interfaces/interfaceAiObjects.py @@ -262,11 +262,17 @@ class AiObjects: logger.info(f"✅ Image content part processed successfully with model: {model.name}") # Convert to AiCallResponse format + # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None + # Calculate processing time if not provided (fallback to 0.0) + processingTime = getattr(modelResponse, 'processingTime', None) + if processingTime is None: + processingTime = 0.0 + return AiCallResponse( content=modelResponse.content, modelName=model.name, - priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0, - processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0, + priceUsd=0.0, # Price will be calculated elsewhere if needed + processingTime=processingTime, bytesSent=0, # Will be calculated elsewhere bytesReceived=0, # Will be calculated elsewhere errorCount=0 diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 2a4f304c..57f81aa7 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}} ) try: + # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way) + if not outputFormat: + outputFormat = "txt" + # Extraction is now separate - contentParts must be extracted before calling # Require operationType to be set before calling opType = getattr(options, "operationType", None) if not opType: - # If outputFormat is specified, default to DATA_GENERATE - if outputFormat: - options.operationType = OperationTypeEnum.DATA_GENERATE - opType = OperationTypeEnum.DATA_GENERATE - else: - self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters") - analyzedOptions = await self._analyzePromptAndCreateOptions(prompt) - if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType: - options.operationType = analyzedOptions.operationType - # Merge other analyzed options - if hasattr(analyzedOptions, "priority"): - options.priority = analyzedOptions.priority - if hasattr(analyzedOptions, "processingMode"): - options.processingMode = analyzedOptions.processingMode - if hasattr(analyzedOptions, "compressPrompt"): - options.compressPrompt = analyzedOptions.compressPrompt - if hasattr(analyzedOptions, "compressContext"): - options.compressContext = analyzedOptions.compressContext - else: - # Default to DATA_ANALYSE if analysis fails - options.operationType = OperationTypeEnum.DATA_ANALYSE - opType = options.operationType + # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE + options.operationType = OperationTypeEnum.DATA_GENERATE + opType = OperationTypeEnum.DATA_GENERATE # Handle IMAGE_GENERATE operations if opType == OperationTypeEnum.IMAGE_GENERATE: @@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}} self.services.chat.progressLogFinish(aiOperationId, False) raise ValueError(errorMsg) - # Handle document generation (outputFormat specified) - if outputFormat: - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt - options.compressPrompt = False - options.compressContext = False + # Handle document generation (outputFormat always set, defaults to "txt") + # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way + # outputFormat is always set now (defaults to "txt" if not specified) + + # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + options.compressPrompt = False + options.compressContext = False + + # Process contentParts for generation prompt (if provided) + # Use generic _callWithContentParts() which handles all content types (images, text, etc.) + # This automatically processes images with vision models and merges all results + if contentParts: + # Filter out binary/other parts that shouldn't be processed + processableParts = [] + skippedParts = [] + for p in contentParts: + if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))): + processableParts.append(p) + else: + skippedParts.append(p) - # Convert contentParts to text for generation prompt (if provided) - if contentParts: - # Convert contentParts to text for generation prompt - content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data]) + if skippedParts: + logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation") + + if processableParts: + # Count images for progress update + imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))]) + if imageCount > 0: + self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models") + + # Build proper extraction prompt using buildExtractionPrompt + # This creates a focused extraction prompt, not the user's generation prompt + from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt + + # Determine renderer for format-specific guidelines + renderer = None + if outputFormat: + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + renderer = generationService.getRendererForFormat(outputFormat) + except Exception as e: + logger.debug(f"Could not get renderer for format {outputFormat}: {e}") + + extractionPrompt = await buildExtractionPrompt( + outputFormat=outputFormat or "txt", + userPrompt=prompt, # User's prompt as context for what to extract + title=title or "Document", + aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None, + services=self.services, + renderer=renderer + ) + + logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt") + + # Use DATA_EXTRACT operation type for extraction + extractionOptions = AiCallOptions( + operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction + compressPrompt=options.compressPrompt, + compressContext=options.compressContext + ) + + extractionRequest = AiCallRequest( + prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt + context="", + options=extractionOptions, + contentParts=processableParts + ) + + # Write debug file for extraction prompt (all parts) + self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt") + + # Call generic content parts processor - handles images, text, chunking, merging + extractionResponse = await self.aiObjects.call(extractionRequest) + + # Write debug file for extraction response + if extractionResponse.content: + self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response") + else: + self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response") + logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})") + + # Use extracted content directly for generation prompt + if extractionResponse.errorCount == 0 and extractionResponse.content: + # The extracted content is already merged and ready to use + content_for_generation = extractionResponse.content + logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation") + else: + # Extraction failed - use placeholders + logger.warning(f"Content extraction failed, using placeholders") + placeholderParts = [] + for p in processableParts: + placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]") + content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None else: content_for_generation = None - - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None + logger.debug("No processable parts found in contentParts") + else: + content_for_generation = None + + self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") + from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt + + generation_prompt = await buildGenerationPrompt( + outputFormat, prompt, title, content_for_generation, None + ) + + promptArgs = { + "outputFormat": outputFormat, + "userPrompt": prompt, + "title": title, + "extracted_content": content_for_generation + } + + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") + # Extract user prompt from promptArgs for task completion analysis + userPrompt = None + if promptArgs: + userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") + + generated_json = await self._callAiWithLooping( + generation_prompt, + options, + "document_generation", + buildGenerationPrompt, + promptArgs, + aiOperationId, + userPrompt=userPrompt + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") + try: + extracted_json = self.services.utils.jsonExtractString(generated_json) + generated_data = json.loads(extracted_json) + except json.JSONDecodeError as e: + logger.error(f"Failed to parse generated JSON: {str(e)}") + self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(f"Generated content is not valid JSON: {str(e)}") + + # Extract title and filename from generated document structure + extractedTitle = title + extractedFilename = None + if isinstance(generated_data, dict) and "documents" in generated_data: + docs = generated_data["documents"] + if isinstance(docs, list) and len(docs) > 0: + firstDoc = docs[0] + if isinstance(firstDoc, dict): + if firstDoc.get("title"): + extractedTitle = firstDoc["title"] + if firstDoc.get("filename"): + extractedFilename = firstDoc["filename"] + + # Ensure metadata contains the extracted title + if "metadata" not in generated_data: + generated_data["metadata"] = {} + if extractedTitle: + generated_data["metadata"]["title"] = extractedTitle + + # Create separate operation for content rendering + renderOperationId = f"{aiOperationId}_render" + renderParentLogId = self.services.chat.getOperationLogId(aiOperationId) + self.services.chat.progressLogStart( + renderOperationId, + "Content Rendering", + "Rendering", + f"Format: {outputFormat}", + parentId=renderParentLogId + ) + + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + generationService = GenerationService(self.services) + self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") + rendered_content, mime_type = await generationService.renderReport( + generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self ) + self.services.chat.progressLogFinish(renderOperationId, True) - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON") - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Generated content is not valid JSON: {str(e)}") - - # Extract title and filename from generated document structure - extractedTitle = title - extractedFilename = None - if isinstance(generated_data, dict) and "documents" in generated_data: - docs = generated_data["documents"] - if isinstance(docs, list) and len(docs) > 0: - firstDoc = docs[0] - if isinstance(firstDoc, dict): - if firstDoc.get("title"): - extractedTitle = firstDoc["title"] - if firstDoc.get("filename"): - extractedFilename = firstDoc["filename"] - - # Ensure metadata contains the extracted title - if "metadata" not in generated_data: - generated_data["metadata"] = {} - if extractedTitle: - generated_data["metadata"]["title"] = extractedTitle - - # Create separate operation for content rendering - renderOperationId = f"{aiOperationId}_render" - renderParentLogId = self.services.chat.getOperationLogId(aiOperationId) - self.services.chat.progressLogStart( - renderOperationId, - "Content Rendering", - "Rendering", - f"Format: {outputFormat}", - parentId=renderParentLogId - ) - - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type = await generationService.renderReport( - generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self - ) - self.services.chat.progressLogFinish(renderOperationId, True) - - # Determine document name - if extractedFilename: - documentName = extractedFilename - elif extractedTitle and extractedTitle != "Generated Document": - sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) - sanitized = re.sub(r"_+", "_", sanitized).strip("_") - if sanitized: - if not sanitized.lower().endswith(f".{outputFormat}"): - documentName = f"{sanitized}.{outputFormat}" - else: - documentName = sanitized + # Determine document name + if extractedFilename: + documentName = extractedFilename + elif extractedTitle and extractedTitle != "Generated Document": + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{outputFormat}"): + documentName = f"{sanitized}.{outputFormat}" else: - documentName = f"generated.{outputFormat}" + documentName = sanitized else: documentName = f"generated.{outputFormat}" - - # Build document data - docData = DocumentData( - documentName=documentName, - documentData=rendered_content, - mimeType=mime_type, - sourceJson=generated_data # Preserve source JSON for structure validation - ) - - metadata = AiResponseMetadata( - title=extractedTitle or title or "Generated Document", - filename=extractedFilename, - operationType=opType.value if opType else None - ) - - self.services.utils.writeDebugFile(str(generated_data), "document_generation_response") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=json.dumps(generated_data), - metadata=metadata, - documents=[docData] - ) - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - if renderOperationId: - self.services.chat.progressLogFinish(renderOperationId, False) - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Rendering failed: {str(e)}") - - # Handle text processing (no outputFormat) - self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call") - - if contentParts: - # Process contentParts through AI - # Convert contentParts to text for prompt - contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data]) - fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt - result_content = await self._callAiWithLooping( - fullPrompt, options, "text", None, None, aiOperationId + else: + documentName = f"generated.{outputFormat}" + + # Build document data + docData = DocumentData( + documentName=documentName, + documentData=rendered_content, + mimeType=mime_type, + sourceJson=generated_data # Preserve source JSON for structure validation ) - else: - # Direct text call (no documents to process) - result_content = await self._callAiWithLooping( - prompt, options, "text", None, None, aiOperationId + + metadata = AiResponseMetadata( + title=extractedTitle or title or "Generated Document", + filename=extractedFilename, + operationType=opType.value if opType else None ) - - metadata = AiResponseMetadata( - operationType=opType.value if opType else None - ) - - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=result_content, - metadata=metadata - ) + + # Write JSON with proper formatting (not str() which can truncate) + jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(jsonStr, "document_generation_response") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=json.dumps(generated_data), + metadata=metadata, + documents=[docData] + ) + + except Exception as e: + logger.error(f"Error rendering document: {str(e)}") + if renderOperationId: + self.services.chat.progressLogFinish(renderOperationId, False) + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(f"Rendering failed: {str(e)}") except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") diff --git a/modules/services/serviceAi/subJsonResponseHandling.py b/modules/services/serviceAi/subJsonResponseHandling.py index 558536b1..f04484d7 100644 --- a/modules/services/serviceAi/subJsonResponseHandling.py +++ b/modules/services/serviceAi/subJsonResponseHandling.py @@ -1236,8 +1236,12 @@ class JsonResponseHandler: # Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows" value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath) + # Handle None (path doesn't exist - incomplete JSON) + if value is None: + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}") # Count items/rows/elements based on type - if isinstance(value, list): + elif isinstance(value, list): updatedKpi["currentValue"] = len(value) logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items") elif isinstance(value, (int, float)): @@ -1296,8 +1300,12 @@ class JsonResponseHandler: # Extract value using path value = JsonResponseHandler._extractValueByPath(parsed, jsonPath) + # Handle None (path doesn't exist - incomplete JSON) + if value is None: + updatedKpi["currentValue"] = kpi.get("currentValue", 0) + logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}") # Count items/rows/elements based on type - if isinstance(value, list): + elif isinstance(value, list): updatedKpi["currentValue"] = len(value) logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items") elif isinstance(value, (int, float)): @@ -1321,6 +1329,7 @@ class JsonResponseHandler: Extract value from object using dot-notation path with array indices. Example: "sections[0].elements[0].items" + Returns None if path doesn't exist (for incomplete JSON handling). """ parts = path.split('.') current = obj @@ -1332,20 +1341,30 @@ class JsonResponseHandler: index = int(part[part.index('[') + 1:part.index(']')]) if key: - current = current.get(key, []) - if isinstance(current, list) and 0 <= index < len(current): - current = current[index] + if isinstance(current, dict): + current = current.get(key) + if current is None: + return None # Key doesn't exist + else: + return None # Can't access key on non-dict + + if isinstance(current, list): + if 0 <= index < len(current): + current = current[index] + else: + # Index out of range - return None for incomplete JSON + return None else: - raise KeyError(f"Invalid index {index} for {key}") + # Not a list, can't index + return None else: # Handle dict access if isinstance(current, dict): current = current.get(part) + if current is None: + return None # Key doesn't exist else: - raise KeyError(f"Cannot access {part} on {type(current)}") - - if current is None: - raise KeyError(f"Path {path} returned None at {part}") + return None # Can't access key on non-dict return current diff --git a/modules/services/serviceChat/mainServiceChat.py b/modules/services/serviceChat/mainServiceChat.py index b1c4d879..9ff148a8 100644 --- a/modules/services/serviceChat/mainServiceChat.py +++ b/modules/services/serviceChat/mainServiceChat.py @@ -92,13 +92,16 @@ class ChatService: if docRef.startswith("docItem:"): # docItem:: or docItem: (filename is optional) # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct) + # Both formats are supported: docItem: and docItem:: parts = docRef.split(':') if len(parts) >= 2: docId = parts[1] # This should be the documentId (UUID) docFound = False # ALWAYS try to match by documentId first (regardless of number of parts) - # This handles: docItem:documentId and docItem:documentId:filename + # This handles both formats: + # - docItem: (without filename - still works) + # - docItem:: (with filename - preferred) for message in workflow.messages: # Validate message belongs to this workflow msgWorkflowId = getattr(message, 'workflowId', None) diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index c35e6156..d8db9922 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -138,6 +138,36 @@ class ExtractionService: f"extraction.process.{doc.mimeType}" ) + # Write extraction results to debug file + try: + from modules.shared.debugLogger import writeDebugFile + import json + # Create summary of extraction results for debug + extractionSummary = { + "documentName": doc.fileName, + "documentMimeType": doc.mimeType, + "partsCount": len(ec.parts), + "parts": [] + } + for part in ec.parts: + partSummary = { + "typeGroup": part.typeGroup, + "mimeType": part.mimeType, + "label": part.label, + "dataLength": len(part.data) if part.data else 0, + "metadata": part.metadata + } + # Include data preview for small parts (first 500 chars) + if part.data and len(part.data) <= 500: + partSummary["dataPreview"] = part.data[:500] + elif part.data: + partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]" + extractionSummary["parts"].append(partSummary) + + writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}") + except Exception as e: + logger.debug(f"Failed to write extraction debug file: {str(e)}") + results.append(ec) return results diff --git a/modules/services/serviceExtraction/subPromptBuilderExtraction.py b/modules/services/serviceExtraction/subPromptBuilderExtraction.py index f6329a5c..7b91579a 100644 --- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py +++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py @@ -99,9 +99,16 @@ async def buildExtractionPrompt( # Parse extraction intent if AI service is available extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt - # Build base prompt + # Build base prompt with clear user prompt markers + sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt adaptive_prompt = f""" -{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt} +{'='*80} +USER REQUEST / USER PROMPT: +{'='*80} +{sanitized_user_prompt} +{'='*80} +END OF USER REQUEST / USER PROMPT +{'='*80} You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output. diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py index 9fca82e9..b797aba3 100644 --- a/modules/services/serviceGeneration/renderers/rendererXlsx.py +++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py @@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer): sheetNames.append(sectionTitle[:31]) # Excel sheet name limit else: - # Single table or mixed content - create main sheet + # Single table or mixed content - create only main sheet documentTitle = jsonContent.get("metadata", {}).get("title", "Document") sheetNames.append(documentTitle[:31]) # Excel sheet name limit - - # Add additional sheets for other content types - contentTypes = set() - for section in sections: - contentType = section.get("content_type", "paragraph") - contentTypes.add(contentType) - - if "table" in contentTypes and len(tableSections) == 1: - sheetNames.append("Table Data") - if "list" in contentTypes: - sheetNames.append("Lists") - if "paragraph" in contentTypes or "heading" in contentTypes: - sheetNames.append("Text") - # Limit to 4 sheets maximum - return sheetNames[:4] + return sheetNames def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None: """Populate Excel sheets with content from JSON based on actual sheet names.""" @@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer): sheetTitle = caption self._populateTableSheet(sheet, section, styles, sheetTitle) else: - # Single table or mixed content - use original logic + # Single table or mixed content - populate only main sheet firstSheetName = sheetNames[0] self._populateMainSheet(sheets[firstSheetName], jsonContent, styles) - # If we have multiple sheets, distribute content by type - if len(sheetNames) > 1: - self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:]) - except Exception as e: self.logger.warning(f"Could not populate Excel sheets: {str(e)}") diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index 92ab8664..91011539 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -72,7 +72,13 @@ async def buildGenerationPrompt( continuationText += "Start directly with the next element/section that should follow.\n\n" # PROMPT FOR CONTINUATION - generationPrompt = f"""User request: "{userPrompt}" + generationPrompt = f"""{'='*80} +USER REQUEST / USER PROMPT: +{'='*80} +{userPrompt} +{'='*80} +END OF USER REQUEST / USER PROMPT +{'='*80} ⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content. @@ -93,8 +99,57 @@ Continue generating the remaining content now. else: # PROMPT FOR FIRST CALL + # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions + + if extracted_content: + # If we have extracted content, put it FIRST and make it very clear it's the source data + generationPrompt = f"""{'='*80} +USER REQUEST / USER PROMPT: +{'='*80} +{userPrompt} +{'='*80} +END OF USER REQUEST / USER PROMPT +{'='*80} - generationPrompt = f"""User request: "{userPrompt}" +{'='*80} +⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️ +{'='*80} +The content below contains the ACTUAL DATA extracted from the source documents. +You MUST use this data - DO NOT generate fake or example data. +{'='*80} +EXTRACTED CONTENT FROM DOCUMENTS: +{'='*80} +{extracted_content} +{'='*80} +END OF EXTRACTED CONTENT +{'='*80} + +Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source. +The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data. +You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template. + +JSON structure template (structure only - use data from EXTRACTED CONTENT above): +{jsonTemplate} + +Instructions: +- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes. +- Do NOT reuse example section IDs; create your own. +- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template. +- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response. +- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective. +- Output JSON only; no markdown fences or extra text. + +Generate your complete response using the extracted content data. +""" + else: + # No extracted content - generate from scratch + generationPrompt = f"""{'='*80} +USER REQUEST / USER PROMPT: +{'='*80} +{userPrompt} +{'='*80} +END OF USER REQUEST / USER PROMPT +{'='*80} Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content. @@ -111,12 +166,5 @@ Instructions: Generate your complete response. """ - # If we have extracted content, prepend it to the prompt - if extracted_content: - generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS: -{extracted_content} - -{generationPrompt}""" - return generationPrompt.strip() diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index d2805ecb..46eb4380 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: Attempt to repair broken JSON using multiple strategies. Generic solution that works for any content type. Returns the best repair attempt or None if all fail. + + IMPORTANT: This function tries to preserve ALL data by avoiding truncation. + Only uses truncation as a last resort when structure closing fails. """ if not text: return None - # Strategy 1: Try to extract sections from the entire text first + # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating + # This preserves all data and should be tried first + closedStr = closeJsonStructures(text) + obj, err, _ = tryParseJson(closedStr) + if err is None and isinstance(obj, dict): + sections = extractSectionsFromDocument(obj) + if sections: + logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)") + return obj + else: + # Structure closing worked but no sections found - still return it + logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)") + return obj + + # Strategy 2: Try to extract sections from the entire text using regex # This handles cases where the JSON structure is broken but content is intact + # NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing extractedSections = _extractSectionsRegex(text) if extractedSections: logger.info(f"Extracted {len(extractedSections)} sections using regex") @@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: "documents": [{"sections": extractedSections}] } - # Strategy 2: Progressive parsing - try to find longest valid prefix + # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA) + # WARNING: This strategy truncates the input and loses data after the truncation point + # Only use as last resort when other strategies fail + logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)") bestResult = None bestValidLength = 0 @@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: if err is None and isinstance(obj, dict): bestResult = obj bestValidLength = i - logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})") + logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}") break if bestResult: break if bestResult: - logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})") + logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)") # Check if we have sections in the result sections = extractSectionsFromDocument(bestResult) @@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: bestResult["documents"][0]["sections"].extend(extractedSections) return bestResult - # Strategy 3: Structure closing - close incomplete structures - closedStr = closeJsonStructures(text) - obj, err, _ = tryParseJson(closedStr) - if err is None and isinstance(obj, dict): - logger.info("Repaired JSON using structure closing") - return obj - logger.warning("All repair strategies failed") return None @@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]: def closeJsonStructures(text: str) -> str: """ Close incomplete JSON structures by adding missing closing brackets. + Also handles unterminated strings by closing them. """ if not text: return text + result = text + + # Handle unterminated strings: find the last unclosed string + # Look for patterns like: "value" or "value\n (unterminated) + # Simple heuristic: if we end with an unterminated string (odd number of quotes at end) + # Try to close it by finding the last opening quote and closing it + if result.strip(): + # Count quotes - if odd number, we have an unterminated string + quoteCount = result.count('"') + if quoteCount % 2 == 1: + # Find the last opening quote that's not escaped + lastQuotePos = result.rfind('"') + if lastQuotePos >= 0: + # Check if it's escaped + escapeCount = 0 + i = lastQuotePos - 1 + while i >= 0 and result[i] == '\\': + escapeCount += 1 + i -= 1 + # If not escaped (even number of backslashes), close the string + if escapeCount % 2 == 0: + # Find where the string should end (before next comma, bracket, or brace) + # For now, just close it at the end + result += '"' + # Count open/close brackets and braces - openBraces = text.count('{') - closeBraces = text.count('}') - openBrackets = text.count('[') - closeBrackets = text.count(']') + openBraces = result.count('{') + closeBraces = result.count('}') + openBrackets = result.count('[') + closeBrackets = result.count(']') # Close incomplete structures - result = text for _ in range(openBraces - closeBraces): result += '}' for _ in range(openBrackets - closeBrackets): @@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]: NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson to avoid circular dependency. Instead, it implements its own repair strategies. + + IMPORTANT: Tries to preserve data by using structure closing first before truncation. """ sections = [] - # Strategy 1: Try progressive parsing to find longest valid JSON prefix - # Find the longest valid JSON prefix that contains sections + # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data) + closed_str = closeJsonStructures(text) + obj, err, _ = tryParseJson(closed_str) + if err is None and isinstance(obj, dict): + extracted_sections = extractSectionsFromDocument(obj) + if extracted_sections: + logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)") + return extracted_sections + + # Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA) + # WARNING: This truncates the input and loses data + # Only use if structure closing failed + logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)") best_result = None best_valid_length = 0 for step_size in [1000, 500, 100, 50, 10]: @@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]: if err is None and isinstance(obj, dict): extracted_sections = extractSectionsFromDocument(obj) if extracted_sections: - logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}") + logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)") return extracted_sections # Store best result even if no sections found if not best_result: diff --git a/modules/workflows/methods/methodOutlook.py b/modules/workflows/methods/methodOutlook.py index 99768e07..fa7b4e47 100644 --- a/modules/workflows/methods/methodOutlook.py +++ b/modules/workflows/methods/methodOutlook.py @@ -1183,11 +1183,13 @@ Max length: {maxLength} characters Based on the context, decide which documents to attach. +CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:: format (include filename) + Return JSON: {{ "subject": "subject line", "body": "email body (HTML allowed)", - "attachments": ["doc_ref1", "doc_ref2"] + "attachments": ["docItem::"] }} """ @@ -1237,6 +1239,9 @@ Return JSON: elif isinstance(ai_attachments, list): ai_attachments = [a for a in ai_attachments if isinstance(a, str)] + # Initialize normalized_ai_attachments + normalized_ai_attachments = [] + if ai_attachments: try: ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments @@ -1250,16 +1255,20 @@ Return JSON: selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids] if selected_docs: - # Map selected ChatDocuments back to docItem references + # Map selected ChatDocuments back to docItem references (with full filename) documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs] + # Normalize ai_attachments to full format for storage + normalized_ai_attachments = documentList.copy() logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)") else: # No intersection; use all available documents documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] + normalized_ai_attachments = documentList.copy() logger.warning("AI selected attachments not found in available documents, using all documents") else: # No AI selection; use all available documents documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs] + normalized_ai_attachments = documentList.copy() logger.warning("AI did not specify attachments, using all available documents") else: logger.info("No documents provided in documentList; skipping attachment processing") @@ -1363,7 +1372,7 @@ Return JSON: "cc": cc, "bcc": bcc, "attachments": len(documentList), - "aiSelectedAttachments": ai_attachments if ai_attachments else "all documents", + "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents", "aiGenerated": True, "context": context, "emailStyle": emailStyle, @@ -1371,12 +1380,40 @@ Return JSON: "draftData": draft_data } + # Extract attachment filenames for validation metadata + attachmentFilenames = [] + attachmentReferences = [] + if documentList: + try: + from modules.datamodels.datamodelDocref import DocumentReferenceList + attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or [] + attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)] + # Store normalized document references (with filenames) - use normalized_ai_attachments if available + attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs] + except Exception: + pass + + # Create validation metadata for content validator + validationMetadata = { + "actionType": "outlook.composeAndDraftEmailWithContext", + "emailRecipients": to, + "emailCc": cc, + "emailBcc": bcc, + "emailSubject": subject, + "emailAttachments": attachmentFilenames, + "emailAttachmentReferences": attachmentReferences, + "emailAttachmentCount": len(attachmentFilenames), + "emailStyle": emailStyle, + "hasAttachments": len(attachmentFilenames) > 0 + } + return ActionResult( success=True, documents=[ActionDocument( documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json", documentData=json.dumps(draftResultData, indent=2), - mimeType="application/json" + mimeType="application/json", + validationMetadata=validationMetadata )] ) else: diff --git a/modules/workflows/methods/methodSharepoint.py b/modules/workflows/methods/methodSharepoint.py index 2c773989..92d77e8e 100644 --- a/modules/workflows/methods/methodSharepoint.py +++ b/modules/workflows/methods/methodSharepoint.py @@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase): resultData = json.loads(fileData) foundDocuments = resultData.get("foundDocuments", []) + # If no foundDocuments, check if it's a listDocuments result (has listResults) + if not foundDocuments and "listResults" in resultData: + logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format") + listResults = resultData.get("listResults", []) + foundDocuments = [] + siteIdFromList = None + siteNameFromList = None + + for listResult in listResults: + siteResults = listResult.get("siteResults", []) + for siteResult in siteResults: + items = siteResult.get("items", []) + # Extract site info from first item if available + if items and not siteIdFromList: + # Try to get site info from the siteResult structure + # We need to discover sites to get the siteId + siteNameFromList = items[0].get("siteName") + + for item in items: + # Convert listDocuments item format to foundDocuments format + if item.get("type") == "file": + foundDoc = { + "id": item.get("id"), + "name": item.get("name"), + "type": "file", + "siteName": item.get("siteName"), + "siteId": None, # Will be determined from site discovery + "webUrl": item.get("webUrl"), + "fullPath": item.get("webUrl", ""), + "parentPath": item.get("parentPath", "") + } + foundDocuments.append(foundDoc) + + # Discover sites to get siteId if we have siteName + if foundDocuments and siteNameFromList and not siteIdFromList: + logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'") + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteNameFromList) + if matchingSites: + siteIdFromList = matchingSites[0].get("id") + # Update all foundDocuments with siteId + for doc in foundDocuments: + doc["siteId"] = siteIdFromList + logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'") + + logger.info(f"Converted {len(foundDocuments)} files from listResults format") + if foundDocuments: # Extract SharePoint file IDs from foundDocuments sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"] @@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase): siteName = firstDoc.get("siteName") siteId = firstDoc.get("siteId") + # If siteId is missing (from listDocuments conversion), discover sites to find it + if siteName and not siteId: + logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'") + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteName) + if matchingSites: + siteId = matchingSites[0].get("id") + logger.info(f"Found siteId '{siteId}' for site '{siteName}'") + if siteName and siteId: sites = [{ "id": siteId, @@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase): "webUrl": firstDoc.get("webUrl", "") }] logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})") + elif siteName: + # Try to get site by name + allSites = await self._discoverSharePointSites() + matchingSites = self._filterSitesByHint(allSites, siteName) + if matchingSites: + sites = [{ + "id": matchingSites[0].get("id"), + "displayName": siteName, + "webUrl": matchingSites[0].get("webUrl", "") + }] + logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})") + else: + return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.") else: return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.") else: diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py index a2c93288..b065b912 100644 --- a/modules/workflows/processing/adaptive/contentValidator.py +++ b/modules/workflows/processing/adaptive/contentValidator.py @@ -421,14 +421,6 @@ class ContentValidator: if actionName: # Convert action name to human-readable format actionDescription = actionName.replace("ai.", "").replace(".", " ").title() - if "convert" in actionName.lower(): - actionDescription = "Document format conversion" - elif "generate" in actionName.lower() or "create" in actionName.lower(): - actionDescription = "Document generation" - elif "extract" in actionName.lower(): - actionDescription = "Content extraction" - elif "process" in actionName.lower(): - actionDescription = "Content processing" actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})" # Build action parameters context @@ -441,6 +433,25 @@ class ContentValidator: paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2) actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}" + # Extract validation metadata from documents (action-specific context) + validationMetadataContext = "" + if documents: + metadataList = [] + for doc in documents: + metadata = getattr(doc, 'validationMetadata', None) + if metadata and isinstance(metadata, dict): + metadataList.append(metadata) + + if metadataList: + # Combine all metadata (usually just one document) + combinedMetadata = {} + for meta in metadataList: + combinedMetadata.update(meta) + + if combinedMetadata: + metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2) + validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}" + # Format success criteria for display with index numbers if successCriteria: criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)]) @@ -452,7 +463,7 @@ class ContentValidator: === TASK INFORMATION === {objectiveLabel}: '{objectiveText}' EXPECTED DATA TYPE: {dataType} -EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext} +EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext} === VALIDATION INSTRUCTIONS === @@ -466,6 +477,7 @@ VALIDATION RULES: 5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help. VALIDATION STEPS: +- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context - Check structure summary for quantities, counts, statistics - Compare found values with required values from criteria - If structure unavailable, use metadata only (format, filename, size) diff --git a/modules/workflows/processing/modes/modeAutomation.py b/modules/workflows/processing/modes/modeAutomation.py index b8600e0f..996462b7 100644 --- a/modules/workflows/processing/modes/modeAutomation.py +++ b/modules/workflows/processing/modes/modeAutomation.py @@ -169,6 +169,10 @@ class AutomationMode(BaseMode): Execute task using Automation mode - executes predefined actions directly. No AI planning or review phases - actions are executed sequentially as defined. """ + # Get task index from workflow state for consistency + if taskIndex is None: + taskIndex = workflow.getTaskIndex() + logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===") try: @@ -178,7 +182,6 @@ class AutomationMode(BaseMode): # Update workflow before executing task if taskIndex is not None: self._updateWorkflowBeforeExecutingTask(taskIndex) - self.services.chat.setWorkflowContext(taskNumber=taskIndex) # Create task start message await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks) @@ -241,7 +244,7 @@ class AutomationMode(BaseMode): # Execute action result = await self.actionExecutor.executeSingleAction( - action, workflow, taskStep, taskIndex, actionNumber, totalActions + action, workflow, taskStep ) actionResults.append(result) diff --git a/modules/workflows/processing/modes/modeDynamic.py b/modules/workflows/processing/modes/modeDynamic.py index dac6211f..5cc8b866 100644 --- a/modules/workflows/processing/modes/modeDynamic.py +++ b/modules/workflows/processing/modes/modeDynamic.py @@ -561,6 +561,11 @@ class DynamicMode(BaseMode): # Use connectionReference from selection (required) connectionRef = selection.get('connectionReference') + + # If not found at top level, check in selection['parameters'] (guided action case) + if not connectionRef and isinstance(selection, dict) and 'parameters' in selection: + connectionRef = selection['parameters'].get('connectionReference') + if connectionRef: # Check if action actually has connectionReference parameter methodName, actionName = compoundActionName.split('.', 1) diff --git a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py index ae59fafc..a58467fb 100644 --- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py +++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py @@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}} OBJECTIVE: {{KEY:TASK_OBJECTIVE}} === AVAILABLE RESOURCES === -DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} +AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}} {{KEY:AVAILABLE_DOCUMENTS_INDEX}} -CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}} +AVAILABLE_CONNECTIONS_INDEX: +{{KEY:AVAILABLE_CONNECTIONS_INDEX}} === AVAILABLE ACTIONS === {{KEY:AVAILABLE_METHODS}} @@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST: - Be the next logical incremental step (not complete entire objective in one step) - Target exactly one output format if producing files - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...) +- ALWAYS use FULL document references with filename: docItem:: (filename is required) - Learn from previous validation feedback and avoid repeated mistakes - Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria) @@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST: "successCriteria": ["specific criterion 1", "specific criterion 2"], "userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)", "learnings": ["..."], - "requiredInputDocuments": ["docList:..."], + "requiredInputDocuments": ["docItem::", "docList: