integration testing of adapted ai workflow with fixes

2025-12-01 19:15:50 +01:00 · 2025-12-01 19:15:50 +01:00 · b401be703f
commit b401be703f
parent aff37fd2e2
18 changed files with 648 additions and 293 deletions
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
        None,
        description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
    )
+    validationMetadata: Optional[Dict[str, Any]] = Field(
+        None,
+        description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
+    )


 registerModelLabels(
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -262,11 +262,17 @@ class AiObjects:
                        logger.info(f"✅ Image content part processed successfully with model: {model.name}")
                        
                        # Convert to AiCallResponse format
+                        # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
+                        # Calculate processing time if not provided (fallback to 0.0)
+                        processingTime = getattr(modelResponse, 'processingTime', None)
+                        if processingTime is None:
+                            processingTime = 0.0
+                        
                        return AiCallResponse(
                            content=modelResponse.content,
                            modelName=model.name,
-                            priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
-                            processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
+                            priceUsd=0.0,  # Price will be calculated elsewhere if needed
+                            processingTime=processingTime,
                            bytesSent=0,  # Will be calculated elsewhere
                            bytesReceived=0,  # Will be calculated elsewhere
                            errorCount=0
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
        )
        
        try:
+            # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
+            if not outputFormat:
+                outputFormat = "txt"
+            
            # Extraction is now separate - contentParts must be extracted before calling
            # Require operationType to be set before calling
            opType = getattr(options, "operationType", None)
            if not opType:
-                # If outputFormat is specified, default to DATA_GENERATE
-                if outputFormat:
-                    options.operationType = OperationTypeEnum.DATA_GENERATE
-                    opType = OperationTypeEnum.DATA_GENERATE
-                else:
-                    self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
-                    analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
-                    if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
-                        options.operationType = analyzedOptions.operationType
-                        # Merge other analyzed options
-                        if hasattr(analyzedOptions, "priority"):
-                            options.priority = analyzedOptions.priority
-                        if hasattr(analyzedOptions, "processingMode"):
-                            options.processingMode = analyzedOptions.processingMode
-                        if hasattr(analyzedOptions, "compressPrompt"):
-                            options.compressPrompt = analyzedOptions.compressPrompt
-                        if hasattr(analyzedOptions, "compressContext"):
-                            options.compressContext = analyzedOptions.compressContext
-                    else:
-                        # Default to DATA_ANALYSE if analysis fails
-                        options.operationType = OperationTypeEnum.DATA_ANALYSE
-                    opType = options.operationType
+                # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
+                options.operationType = OperationTypeEnum.DATA_GENERATE
+                opType = OperationTypeEnum.DATA_GENERATE
            
            # Handle IMAGE_GENERATE operations
            if opType == OperationTypeEnum.IMAGE_GENERATE:
@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
                    self.services.chat.progressLogFinish(aiOperationId, False)
                    raise ValueError(errorMsg)
            
-            # Handle document generation (outputFormat specified)
-            if outputFormat:
-                # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
-                options.compressPrompt = False
-                options.compressContext = False
+            # Handle document generation (outputFormat always set, defaults to "txt")
+            # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
+            # outputFormat is always set now (defaults to "txt" if not specified)
+            
+            # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
+            options.compressPrompt = False
+            options.compressContext = False
+            
+            # Process contentParts for generation prompt (if provided)
+            # Use generic _callWithContentParts() which handles all content types (images, text, etc.)
+            # This automatically processes images with vision models and merges all results
+            if contentParts:
+                # Filter out binary/other parts that shouldn't be processed
+                processableParts = []
+                skippedParts = []
+                for p in contentParts:
+                    if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
+                        processableParts.append(p)
+                    else:
+                        skippedParts.append(p)
                
-                # Convert contentParts to text for generation prompt (if provided)
-                if contentParts:
-                    # Convert contentParts to text for generation prompt
-                    content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
+                if skippedParts:
+                    logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
+                
+                if processableParts:
+                    # Count images for progress update
+                    imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
+                    if imageCount > 0:
+                        self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
+                    
+                    # Build proper extraction prompt using buildExtractionPrompt
+                    # This creates a focused extraction prompt, not the user's generation prompt
+                    from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
+                    
+                    # Determine renderer for format-specific guidelines
+                    renderer = None
+                    if outputFormat:
+                        try:
+                            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+                            generationService = GenerationService(self.services)
+                            renderer = generationService.getRendererForFormat(outputFormat)
+                        except Exception as e:
+                            logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
+                    
+                    extractionPrompt = await buildExtractionPrompt(
+                        outputFormat=outputFormat or "txt",
+                        userPrompt=prompt,  # User's prompt as context for what to extract
+                        title=title or "Document",
+                        aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
+                        services=self.services,
+                        renderer=renderer
+                    )
+                    
+                    logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
+                    
+                    # Use DATA_EXTRACT operation type for extraction
+                    extractionOptions = AiCallOptions(
+                        operationType=OperationTypeEnum.DATA_EXTRACT,  # Use DATA_EXTRACT for extraction
+                        compressPrompt=options.compressPrompt,
+                        compressContext=options.compressContext
+                    )
+                    
+                    extractionRequest = AiCallRequest(
+                        prompt=extractionPrompt,  # Use proper extraction prompt, not user's generation prompt
+                        context="",
+                        options=extractionOptions,
+                        contentParts=processableParts
+                    )
+                    
+                    # Write debug file for extraction prompt (all parts)
+                    self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
+                    
+                    # Call generic content parts processor - handles images, text, chunking, merging
+                    extractionResponse = await self.aiObjects.call(extractionRequest)
+                    
+                    # Write debug file for extraction response
+                    if extractionResponse.content:
+                        self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
+                    else:
+                        self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
+                        logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
+                    
+                    # Use extracted content directly for generation prompt
+                    if extractionResponse.errorCount == 0 and extractionResponse.content:
+                        # The extracted content is already merged and ready to use
+                        content_for_generation = extractionResponse.content
+                        logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
+                    else:
+                        # Extraction failed - use placeholders
+                        logger.warning(f"Content extraction failed, using placeholders")
+                        placeholderParts = []
+                        for p in processableParts:
+                            placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
+                        content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
                else:
                    content_for_generation = None
-                
-                self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
-                from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
-                
-                generation_prompt = await buildGenerationPrompt(
-                    outputFormat, prompt, title, content_for_generation, None
+                    logger.debug("No processable parts found in contentParts")
+            else:
+                content_for_generation = None
+            
+            self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
+            from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
+            
+            generation_prompt = await buildGenerationPrompt(
+                outputFormat, prompt, title, content_for_generation, None
+            )
+            
+            promptArgs = {
+                "outputFormat": outputFormat,
+                "userPrompt": prompt,
+                "title": title,
+                "extracted_content": content_for_generation
+            }
+            
+            self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
+            # Extract user prompt from promptArgs for task completion analysis
+            userPrompt = None
+            if promptArgs:
+                userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
+            
+            generated_json = await self._callAiWithLooping(
+                generation_prompt, 
+                options, 
+                "document_generation",
+                buildGenerationPrompt,
+                promptArgs,
+                aiOperationId,
+                userPrompt=userPrompt
+            )
+            
+            self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
+            try:
+                extracted_json = self.services.utils.jsonExtractString(generated_json)
+                generated_data = json.loads(extracted_json)
+            except json.JSONDecodeError as e:
+                logger.error(f"Failed to parse generated JSON: {str(e)}")
+                self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
+                self.services.chat.progressLogFinish(aiOperationId, False)
+                raise ValueError(f"Generated content is not valid JSON: {str(e)}")
+            
+            # Extract title and filename from generated document structure
+            extractedTitle = title
+            extractedFilename = None
+            if isinstance(generated_data, dict) and "documents" in generated_data:
+                docs = generated_data["documents"]
+                if isinstance(docs, list) and len(docs) > 0:
+                    firstDoc = docs[0]
+                    if isinstance(firstDoc, dict):
+                        if firstDoc.get("title"):
+                            extractedTitle = firstDoc["title"]
+                        if firstDoc.get("filename"):
+                            extractedFilename = firstDoc["filename"]
+            
+            # Ensure metadata contains the extracted title
+            if "metadata" not in generated_data:
+                generated_data["metadata"] = {}
+            if extractedTitle:
+                generated_data["metadata"]["title"] = extractedTitle
+            
+            # Create separate operation for content rendering
+            renderOperationId = f"{aiOperationId}_render"
+            renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
+            self.services.chat.progressLogStart(
+                renderOperationId,
+                "Content Rendering",
+                "Rendering",
+                f"Format: {outputFormat}",
+                parentId=renderParentLogId
+            )
+            
+            try:
+                from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+                generationService = GenerationService(self.services)
+                self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
+                rendered_content, mime_type = await generationService.renderReport(
+                    generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
                )
+                self.services.chat.progressLogFinish(renderOperationId, True)
                
-                promptArgs = {
-                    "outputFormat": outputFormat,
-                    "userPrompt": prompt,
-                    "title": title,
-                    "extracted_content": content_for_generation
-                }
-                
-                self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
-                # Extract user prompt from promptArgs for task completion analysis
-                userPrompt = None
-                if promptArgs:
-                    userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
-                
-                generated_json = await self._callAiWithLooping(
-                    generation_prompt, 
-                    options, 
-                    "document_generation",
-                    buildGenerationPrompt,
-                    promptArgs,
-                    aiOperationId,
-                    userPrompt=userPrompt
-                )
-                
-                self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
-                try:
-                    extracted_json = self.services.utils.jsonExtractString(generated_json)
-                    generated_data = json.loads(extracted_json)
-                except json.JSONDecodeError as e:
-                    logger.error(f"Failed to parse generated JSON: {str(e)}")
-                    self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
-                    self.services.chat.progressLogFinish(aiOperationId, False)
-                    raise ValueError(f"Generated content is not valid JSON: {str(e)}")
-                
-                # Extract title and filename from generated document structure
-                extractedTitle = title
-                extractedFilename = None
-                if isinstance(generated_data, dict) and "documents" in generated_data:
-                    docs = generated_data["documents"]
-                    if isinstance(docs, list) and len(docs) > 0:
-                        firstDoc = docs[0]
-                        if isinstance(firstDoc, dict):
-                            if firstDoc.get("title"):
-                                extractedTitle = firstDoc["title"]
-                            if firstDoc.get("filename"):
-                                extractedFilename = firstDoc["filename"]
-                
-                # Ensure metadata contains the extracted title
-                if "metadata" not in generated_data:
-                    generated_data["metadata"] = {}
-                if extractedTitle:
-                    generated_data["metadata"]["title"] = extractedTitle
-                
-                # Create separate operation for content rendering
-                renderOperationId = f"{aiOperationId}_render"
-                renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
-                self.services.chat.progressLogStart(
-                    renderOperationId,
-                    "Content Rendering",
-                    "Rendering",
-                    f"Format: {outputFormat}",
-                    parentId=renderParentLogId
-                )
-                
-                try:
-                    from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
-                    generationService = GenerationService(self.services)
-                    self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
-                    rendered_content, mime_type = await generationService.renderReport(
-                        generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
-                    )
-                    self.services.chat.progressLogFinish(renderOperationId, True)
-                    
-                    # Determine document name
-                    if extractedFilename:
-                        documentName = extractedFilename
-                    elif extractedTitle and extractedTitle != "Generated Document":
-                        sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
-                        sanitized = re.sub(r"_+", "_", sanitized).strip("_")
-                        if sanitized:
-                            if not sanitized.lower().endswith(f".{outputFormat}"):
-                                documentName = f"{sanitized}.{outputFormat}"
-                            else:
-                                documentName = sanitized
+                # Determine document name
+                if extractedFilename:
+                    documentName = extractedFilename
+                elif extractedTitle and extractedTitle != "Generated Document":
+                    sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
+                    sanitized = re.sub(r"_+", "_", sanitized).strip("_")
+                    if sanitized:
+                        if not sanitized.lower().endswith(f".{outputFormat}"):
+                            documentName = f"{sanitized}.{outputFormat}"
                        else:
-                            documentName = f"generated.{outputFormat}"
+                            documentName = sanitized
                    else:
                        documentName = f"generated.{outputFormat}"
-                    
-                    # Build document data
-                    docData = DocumentData(
-                        documentName=documentName,
-                        documentData=rendered_content,
-                        mimeType=mime_type,
-                        sourceJson=generated_data  # Preserve source JSON for structure validation
-                    )
-                    
-                    metadata = AiResponseMetadata(
-                        title=extractedTitle or title or "Generated Document",
-                        filename=extractedFilename,
-                        operationType=opType.value if opType else None
-                    )
-                    
-                    self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
-                    self.services.chat.progressLogFinish(aiOperationId, True)
-                    
-                    return AiResponse(
-                        content=json.dumps(generated_data),
-                        metadata=metadata,
-                        documents=[docData]
-                    )
-                    
-                except Exception as e:
-                    logger.error(f"Error rendering document: {str(e)}")
-                    if renderOperationId:
-                        self.services.chat.progressLogFinish(renderOperationId, False)
-                    self.services.chat.progressLogFinish(aiOperationId, False)
-                    raise ValueError(f"Rendering failed: {str(e)}")
-            
-            # Handle text processing (no outputFormat)
-            self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
-            
-            if contentParts:
-                # Process contentParts through AI
-                # Convert contentParts to text for prompt
-                contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
-                fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
-                result_content = await self._callAiWithLooping(
-                    fullPrompt, options, "text", None, None, aiOperationId
+                else:
+                    documentName = f"generated.{outputFormat}"
+                
+                # Build document data
+                docData = DocumentData(
+                    documentName=documentName,
+                    documentData=rendered_content,
+                    mimeType=mime_type,
+                    sourceJson=generated_data  # Preserve source JSON for structure validation
                )
-            else:
-                # Direct text call (no documents to process)
-                result_content = await self._callAiWithLooping(
-                    prompt, options, "text", None, None, aiOperationId
+                
+                metadata = AiResponseMetadata(
+                    title=extractedTitle or title or "Generated Document",
+                    filename=extractedFilename,
+                    operationType=opType.value if opType else None
                )
-            
-            metadata = AiResponseMetadata(
-                operationType=opType.value if opType else None
-            )
-            
-            self.services.chat.progressLogFinish(aiOperationId, True)
-            
-            return AiResponse(
-                content=result_content,
-                metadata=metadata
-            )
+                
+                # Write JSON with proper formatting (not str() which can truncate)
+                jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
+                self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
+                self.services.chat.progressLogFinish(aiOperationId, True)
+                
+                return AiResponse(
+                    content=json.dumps(generated_data),
+                    metadata=metadata,
+                    documents=[docData]
+                )
+                
+            except Exception as e:
+                logger.error(f"Error rendering document: {str(e)}")
+                if renderOperationId:
+                    self.services.chat.progressLogFinish(renderOperationId, False)
+                self.services.chat.progressLogFinish(aiOperationId, False)
+                raise ValueError(f"Rendering failed: {str(e)}")
            
        except Exception as e:
            logger.error(f"Error in callAiContent: {str(e)}")
--- a/modules/services/serviceAi/subJsonResponseHandling.py
+++ b/modules/services/serviceAi/subJsonResponseHandling.py
@ -1236,8 +1236,12 @@ class JsonResponseHandler:
                # Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
                value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
                
+                # Handle None (path doesn't exist - incomplete JSON)
+                if value is None:
+                    updatedKpi["currentValue"] = kpi.get("currentValue", 0)
+                    logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
                # Count items/rows/elements based on type
-                if isinstance(value, list):
+                elif isinstance(value, list):
                    updatedKpi["currentValue"] = len(value)
                    logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
                elif isinstance(value, (int, float)):
@ -1296,8 +1300,12 @@ class JsonResponseHandler:
                # Extract value using path
                value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
                
+                # Handle None (path doesn't exist - incomplete JSON)
+                if value is None:
+                    updatedKpi["currentValue"] = kpi.get("currentValue", 0)
+                    logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
                # Count items/rows/elements based on type
-                if isinstance(value, list):
+                elif isinstance(value, list):
                    updatedKpi["currentValue"] = len(value)
                    logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
                elif isinstance(value, (int, float)):
@ -1321,6 +1329,7 @@ class JsonResponseHandler:
        Extract value from object using dot-notation path with array indices.
        
        Example: "sections[0].elements[0].items"
+        Returns None if path doesn't exist (for incomplete JSON handling).
        """
        parts = path.split('.')
        current = obj
@ -1332,20 +1341,30 @@ class JsonResponseHandler:
                index = int(part[part.index('[') + 1:part.index(']')])
                
                if key:
-                    current = current.get(key, [])
-                if isinstance(current, list) and 0 <= index < len(current):
-                    current = current[index]
+                    if isinstance(current, dict):
+                        current = current.get(key)
+                        if current is None:
+                            return None  # Key doesn't exist
+                    else:
+                        return None  # Can't access key on non-dict
+                
+                if isinstance(current, list):
+                    if 0 <= index < len(current):
+                        current = current[index]
+                    else:
+                            # Index out of range - return None for incomplete JSON
+                            return None
                else:
-                    raise KeyError(f"Invalid index {index} for {key}")
+                    # Not a list, can't index
+                    return None
            else:
                # Handle dict access
                if isinstance(current, dict):
                    current = current.get(part)
+                    if current is None:
+                        return None  # Key doesn't exist
                else:
-                    raise KeyError(f"Cannot access {part} on {type(current)}")
-            
-            if current is None:
-                raise KeyError(f"Path {path} returned None at {part}")
+                    return None  # Can't access key on non-dict
        
        return current
    
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@ -92,13 +92,16 @@ class ChatService:
                if docRef.startswith("docItem:"):
                    # docItem:<id>:<filename> or docItem:<id> (filename is optional)
                    # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
+                    # Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
                    parts = docRef.split(':')
                    if len(parts) >= 2:
                        docId = parts[1]  # This should be the documentId (UUID)
                        docFound = False
                        
                        # ALWAYS try to match by documentId first (regardless of number of parts)
-                        # This handles: docItem:documentId and docItem:documentId:filename
+                        # This handles both formats:
+                        # - docItem:<documentId> (without filename - still works)
+                        # - docItem:<documentId>:<filename> (with filename - preferred)
                        for message in workflow.messages:
                            # Validate message belongs to this workflow
                            msgWorkflowId = getattr(message, 'workflowId', None)
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@ -138,6 +138,36 @@ class ExtractionService:
                f"extraction.process.{doc.mimeType}"
            )
            
+            # Write extraction results to debug file
+            try:
+                from modules.shared.debugLogger import writeDebugFile
+                import json
+                # Create summary of extraction results for debug
+                extractionSummary = {
+                    "documentName": doc.fileName,
+                    "documentMimeType": doc.mimeType,
+                    "partsCount": len(ec.parts),
+                    "parts": []
+                }
+                for part in ec.parts:
+                    partSummary = {
+                        "typeGroup": part.typeGroup,
+                        "mimeType": part.mimeType,
+                        "label": part.label,
+                        "dataLength": len(part.data) if part.data else 0,
+                        "metadata": part.metadata
+                    }
+                    # Include data preview for small parts (first 500 chars)
+                    if part.data and len(part.data) <= 500:
+                        partSummary["dataPreview"] = part.data[:500]
+                    elif part.data:
+                        partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
+                    extractionSummary["parts"].append(partSummary)
+                
+                writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
+            except Exception as e:
+                logger.debug(f"Failed to write extraction debug file: {str(e)}")
+            
            results.append(ec)
        
        return results
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@ -99,9 +99,16 @@ async def buildExtractionPrompt(
    # Parse extraction intent if AI service is available
    extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
    
-    # Build base prompt
+    # Build base prompt with clear user prompt markers
+    sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
    adaptive_prompt = f"""
-{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
+{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{sanitized_user_prompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}

 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.

--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
                
                sheetNames.append(sectionTitle[:31])  # Excel sheet name limit
        else:
-            # Single table or mixed content - create main sheet
+            # Single table or mixed content - create only main sheet
            documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
            sheetNames.append(documentTitle[:31])  # Excel sheet name limit
-            
-            # Add additional sheets for other content types
-            contentTypes = set()
-            for section in sections:
-                contentType = section.get("content_type", "paragraph")
-                contentTypes.add(contentType)
-            
-            if "table" in contentTypes and len(tableSections) == 1:
-                sheetNames.append("Table Data")
-            if "list" in contentTypes:
-                sheetNames.append("Lists")
-            if "paragraph" in contentTypes or "heading" in contentTypes:
-                sheetNames.append("Text")
        
-        # Limit to 4 sheets maximum
-        return sheetNames[:4]
+        return sheetNames
    
    def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Populate Excel sheets with content from JSON based on actual sheet names."""
@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
                                sheetTitle = caption
                        self._populateTableSheet(sheet, section, styles, sheetTitle)
            else:
-                # Single table or mixed content - use original logic
+                # Single table or mixed content - populate only main sheet
                firstSheetName = sheetNames[0]
                self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
                
-                # If we have multiple sheets, distribute content by type
-                if len(sheetNames) > 1:
-                    self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
-                
        except Exception as e:
            self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
    
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@ -72,7 +72,13 @@ async def buildGenerationPrompt(
        continuationText += "Start directly with the next element/section that should follow.\n\n"
        
        # PROMPT FOR CONTINUATION
-        generationPrompt = f"""User request: "{userPrompt}"
+        generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}

 ⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.

@ -93,8 +99,57 @@ Continue generating the remaining content now.
    else:

        # PROMPT FOR FIRST CALL
+        # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
+        
+        if extracted_content:
+            # If we have extracted content, put it FIRST and make it very clear it's the source data
+            generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}

-        generationPrompt = f"""User request: "{userPrompt}"
+{'='*80}
+⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
+{'='*80}
+The content below contains the ACTUAL DATA extracted from the source documents.
+You MUST use this data - DO NOT generate fake or example data.
+{'='*80}
+EXTRACTED CONTENT FROM DOCUMENTS:
+{'='*80}
+{extracted_content}
+{'='*80}
+END OF EXTRACTED CONTENT
+{'='*80}
+
+Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
+The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
+You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
+
+JSON structure template (structure only - use data from EXTRACTED CONTENT above):
+{jsonTemplate}
+
+Instructions:
+- Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
+- Do NOT reuse example section IDs; create your own.
+- CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
+- Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
+- IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
+- Output JSON only; no markdown fences or extra text.
+
+Generate your complete response using the extracted content data.
+"""
+        else:
+            # No extracted content - generate from scratch
+            generationPrompt = f"""{'='*80}
+USER REQUEST / USER PROMPT:
+{'='*80}
+{userPrompt}
+{'='*80}
+END OF USER REQUEST / USER PROMPT
+{'='*80}

 Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.

@ -111,12 +166,5 @@ Instructions:
 Generate your complete response.
 """
    
-    # If we have extracted content, prepend it to the prompt
-    if extracted_content:
-        generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
-{extracted_content}
-
-{generationPrompt}"""
-    
    return generationPrompt.strip()

--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
    Attempt to repair broken JSON using multiple strategies.
    Generic solution that works for any content type.
    Returns the best repair attempt or None if all fail.
+    
+    IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
+    Only uses truncation as a last resort when structure closing fails.
    """
    if not text:
        return None
    
-    # Strategy 1: Try to extract sections from the entire text first
+    # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
+    # This preserves all data and should be tried first
+    closedStr = closeJsonStructures(text)
+    obj, err, _ = tryParseJson(closedStr)
+    if err is None and isinstance(obj, dict):
+        sections = extractSectionsFromDocument(obj)
+        if sections:
+            logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
+            return obj
+        else:
+            # Structure closing worked but no sections found - still return it
+            logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
+            return obj
+    
+    # Strategy 2: Try to extract sections from the entire text using regex
    # This handles cases where the JSON structure is broken but content is intact
+    # NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
    extractedSections = _extractSectionsRegex(text)
    if extractedSections:
        logger.info(f"Extracted {len(extractedSections)} sections using regex")
@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
            "documents": [{"sections": extractedSections}]
        }
    
-    # Strategy 2: Progressive parsing - try to find longest valid prefix
+    # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
+    # WARNING: This strategy truncates the input and loses data after the truncation point
+    # Only use as last resort when other strategies fail
+    logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
    bestResult = None
    bestValidLength = 0
    
@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
            if err is None and isinstance(obj, dict):
                bestResult = obj
                bestValidLength = i
-                logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
+                logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
                break
        if bestResult:
            break
    
    if bestResult:
-        logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
+        logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
        
        # Check if we have sections in the result
        sections = extractSectionsFromDocument(bestResult)
@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
                bestResult["documents"][0]["sections"].extend(extractedSections)
                return bestResult
    
-    # Strategy 3: Structure closing - close incomplete structures
-    closedStr = closeJsonStructures(text)
-    obj, err, _ = tryParseJson(closedStr)
-    if err is None and isinstance(obj, dict):
-        logger.info("Repaired JSON using structure closing")
-        return obj
-    
    logger.warning("All repair strategies failed")
    return None

@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
 def closeJsonStructures(text: str) -> str:
    """
    Close incomplete JSON structures by adding missing closing brackets.
+    Also handles unterminated strings by closing them.
    """
    if not text:
        return text
    
+    result = text
+    
+    # Handle unterminated strings: find the last unclosed string
+    # Look for patterns like: "value" or "value\n (unterminated)
+    # Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
+    # Try to close it by finding the last opening quote and closing it
+    if result.strip():
+        # Count quotes - if odd number, we have an unterminated string
+        quoteCount = result.count('"')
+        if quoteCount % 2 == 1:
+            # Find the last opening quote that's not escaped
+            lastQuotePos = result.rfind('"')
+            if lastQuotePos >= 0:
+                # Check if it's escaped
+                escapeCount = 0
+                i = lastQuotePos - 1
+                while i >= 0 and result[i] == '\\':
+                    escapeCount += 1
+                    i -= 1
+                # If not escaped (even number of backslashes), close the string
+                if escapeCount % 2 == 0:
+                    # Find where the string should end (before next comma, bracket, or brace)
+                    # For now, just close it at the end
+                    result += '"'
+    
    # Count open/close brackets and braces
-    openBraces = text.count('{')
-    closeBraces = text.count('}')
-    openBrackets = text.count('[')
-    closeBrackets = text.count(']')
+    openBraces = result.count('{')
+    closeBraces = result.count('}')
+    openBrackets = result.count('[')
+    closeBrackets = result.count(']')
    
    # Close incomplete structures
-    result = text
    for _ in range(openBraces - closeBraces):
        result += '}'
    for _ in range(openBrackets - closeBrackets):
@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
    
    NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
    to avoid circular dependency. Instead, it implements its own repair strategies.
+    
+    IMPORTANT: Tries to preserve data by using structure closing first before truncation.
    """
    sections = []
    
-    # Strategy 1: Try progressive parsing to find longest valid JSON prefix
-    # Find the longest valid JSON prefix that contains sections
+    # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
+    closed_str = closeJsonStructures(text)
+    obj, err, _ = tryParseJson(closed_str)
+    if err is None and isinstance(obj, dict):
+        extracted_sections = extractSectionsFromDocument(obj)
+        if extracted_sections:
+            logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
+            return extracted_sections
+    
+    # Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
+    # WARNING: This truncates the input and loses data
+    # Only use if structure closing failed
+    logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
    best_result = None
    best_valid_length = 0
    for step_size in [1000, 500, 100, 50, 10]:
@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
            if err is None and isinstance(obj, dict):
                extracted_sections = extractSectionsFromDocument(obj)
                if extracted_sections:
-                    logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}")
+                    logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
                    return extracted_sections
                # Store best result even if no sections found
                if not best_result:
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -1183,11 +1183,13 @@ Max length: {maxLength} characters

 Based on the context, decide which documents to attach.

+CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
+
 Return JSON:
 {{
  "subject": "subject line",
  "body": "email body (HTML allowed)",
-  "attachments": ["doc_ref1", "doc_ref2"]
+  "attachments": ["docItem:<documentId>:<filename>"]
 }}
 """
            
@ -1237,6 +1239,9 @@ Return JSON:
                        elif isinstance(ai_attachments, list):
                            ai_attachments = [a for a in ai_attachments if isinstance(a, str)]

+                        # Initialize normalized_ai_attachments
+                        normalized_ai_attachments = []
+                        
                        if ai_attachments:
                            try:
                                ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
@ -1250,16 +1255,20 @@ Return JSON:
                            selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]

                            if selected_docs:
-                                # Map selected ChatDocuments back to docItem references
+                                # Map selected ChatDocuments back to docItem references (with full filename)
                                documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
+                                # Normalize ai_attachments to full format for storage
+                                normalized_ai_attachments = documentList.copy()
                                logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
                            else:
                                # No intersection; use all available documents
                                documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+                                normalized_ai_attachments = documentList.copy()
                                logger.warning("AI selected attachments not found in available documents, using all documents")
                        else:
                            # No AI selection; use all available documents
                            documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
+                            normalized_ai_attachments = documentList.copy()
                            logger.warning("AI did not specify attachments, using all available documents")
                    else:
                        logger.info("No documents provided in documentList; skipping attachment processing")
@ -1363,7 +1372,7 @@ Return JSON:
                        "cc": cc,
                        "bcc": bcc,
                        "attachments": len(documentList),
-                        "aiSelectedAttachments": ai_attachments if ai_attachments else "all documents",
+                        "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
                        "aiGenerated": True,
                        "context": context,
                        "emailStyle": emailStyle,
@ -1371,12 +1380,40 @@ Return JSON:
                        "draftData": draft_data
                    }
                    
+                    # Extract attachment filenames for validation metadata
+                    attachmentFilenames = []
+                    attachmentReferences = []
+                    if documentList:
+                        try:
+                            from modules.datamodels.datamodelDocref import DocumentReferenceList
+                            attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
+                            attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
+                            # Store normalized document references (with filenames) - use normalized_ai_attachments if available
+                            attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
+                        except Exception:
+                            pass
+                    
+                    # Create validation metadata for content validator
+                    validationMetadata = {
+                        "actionType": "outlook.composeAndDraftEmailWithContext",
+                        "emailRecipients": to,
+                        "emailCc": cc,
+                        "emailBcc": bcc,
+                        "emailSubject": subject,
+                        "emailAttachments": attachmentFilenames,
+                        "emailAttachmentReferences": attachmentReferences,
+                        "emailAttachmentCount": len(attachmentFilenames),
+                        "emailStyle": emailStyle,
+                        "hasAttachments": len(attachmentFilenames) > 0
+                    }
+                    
                    return ActionResult(
                        success=True,
                        documents=[ActionDocument(
                            documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
                            documentData=json.dumps(draftResultData, indent=2),
-                            mimeType="application/json"
+                            mimeType="application/json",
+                            validationMetadata=validationMetadata
                        )]
                    )
                else:
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
                    resultData = json.loads(fileData)
                    foundDocuments = resultData.get("foundDocuments", [])
                    
+                    # If no foundDocuments, check if it's a listDocuments result (has listResults)
+                    if not foundDocuments and "listResults" in resultData:
+                        logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
+                        listResults = resultData.get("listResults", [])
+                        foundDocuments = []
+                        siteIdFromList = None
+                        siteNameFromList = None
+                        
+                        for listResult in listResults:
+                            siteResults = listResult.get("siteResults", [])
+                            for siteResult in siteResults:
+                                items = siteResult.get("items", [])
+                                # Extract site info from first item if available
+                                if items and not siteIdFromList:
+                                    # Try to get site info from the siteResult structure
+                                    # We need to discover sites to get the siteId
+                                    siteNameFromList = items[0].get("siteName")
+                                
+                                for item in items:
+                                    # Convert listDocuments item format to foundDocuments format
+                                    if item.get("type") == "file":
+                                        foundDoc = {
+                                            "id": item.get("id"),
+                                            "name": item.get("name"),
+                                            "type": "file",
+                                            "siteName": item.get("siteName"),
+                                            "siteId": None,  # Will be determined from site discovery
+                                            "webUrl": item.get("webUrl"),
+                                            "fullPath": item.get("webUrl", ""),
+                                            "parentPath": item.get("parentPath", "")
+                                        }
+                                        foundDocuments.append(foundDoc)
+                        
+                        # Discover sites to get siteId if we have siteName
+                        if foundDocuments and siteNameFromList and not siteIdFromList:
+                            logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
+                            allSites = await self._discoverSharePointSites()
+                            matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
+                            if matchingSites:
+                                siteIdFromList = matchingSites[0].get("id")
+                                # Update all foundDocuments with siteId
+                                for doc in foundDocuments:
+                                    doc["siteId"] = siteIdFromList
+                                logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
+                        
+                        logger.info(f"Converted {len(foundDocuments)} files from listResults format")
+                    
                    if foundDocuments:
                        # Extract SharePoint file IDs from foundDocuments
                        sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
                            siteName = firstDoc.get("siteName")
                            siteId = firstDoc.get("siteId")
                            
+                            # If siteId is missing (from listDocuments conversion), discover sites to find it
+                            if siteName and not siteId:
+                                logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
+                                allSites = await self._discoverSharePointSites()
+                                matchingSites = self._filterSitesByHint(allSites, siteName)
+                                if matchingSites:
+                                    siteId = matchingSites[0].get("id")
+                                    logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
+                            
                            if siteName and siteId:
                                sites = [{
                                    "id": siteId,
@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
                                    "webUrl": firstDoc.get("webUrl", "")
                                }]
                                logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
+                            elif siteName:
+                                # Try to get site by name
+                                allSites = await self._discoverSharePointSites()
+                                matchingSites = self._filterSitesByHint(allSites, siteName)
+                                if matchingSites:
+                                    sites = [{
+                                        "id": matchingSites[0].get("id"),
+                                        "displayName": siteName,
+                                        "webUrl": matchingSites[0].get("webUrl", "")
+                                    }]
+                                    logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
+                                else:
+                                    return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
                            else:
                                return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
                    else:
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -421,14 +421,6 @@ class ContentValidator:
            if actionName:
                # Convert action name to human-readable format
                actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
-                if "convert" in actionName.lower():
-                    actionDescription = "Document format conversion"
-                elif "generate" in actionName.lower() or "create" in actionName.lower():
-                    actionDescription = "Document generation"
-                elif "extract" in actionName.lower():
-                    actionDescription = "Content extraction"
-                elif "process" in actionName.lower():
-                    actionDescription = "Content processing"
                actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
            
            # Build action parameters context
@ -441,6 +433,25 @@ class ContentValidator:
                    paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
                    actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
            
+            # Extract validation metadata from documents (action-specific context)
+            validationMetadataContext = ""
+            if documents:
+                metadataList = []
+                for doc in documents:
+                    metadata = getattr(doc, 'validationMetadata', None)
+                    if metadata and isinstance(metadata, dict):
+                        metadataList.append(metadata)
+                
+                if metadataList:
+                    # Combine all metadata (usually just one document)
+                    combinedMetadata = {}
+                    for meta in metadataList:
+                        combinedMetadata.update(meta)
+                    
+                    if combinedMetadata:
+                        metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
+                        validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
+            
            # Format success criteria for display with index numbers
            if successCriteria:
                criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
@ -452,7 +463,7 @@ class ContentValidator:
 === TASK INFORMATION ===
 {objectiveLabel}: '{objectiveText}'
 EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}

 === VALIDATION INSTRUCTIONS ===

@ -466,6 +477,7 @@ VALIDATION RULES:
 5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.

 VALIDATION STEPS:
+- Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
 - Check structure summary for quantities, counts, statistics
 - Compare found values with required values from criteria
 - If structure unavailable, use metadata only (format, filename, size)
--- a/modules/workflows/processing/modes/modeAutomation.py
+++ b/modules/workflows/processing/modes/modeAutomation.py
@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
        Execute task using Automation mode - executes predefined actions directly.
        No AI planning or review phases - actions are executed sequentially as defined.
        """
+        # Get task index from workflow state for consistency
+        if taskIndex is None:
+            taskIndex = workflow.getTaskIndex()
+        
        logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
        
        try:
@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
            # Update workflow before executing task
            if taskIndex is not None:
                self._updateWorkflowBeforeExecutingTask(taskIndex)
-                self.services.chat.setWorkflowContext(taskNumber=taskIndex)
            
            # Create task start message
            await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
                
                # Execute action
                result = await self.actionExecutor.executeSingleAction(
-                    action, workflow, taskStep, taskIndex, actionNumber, totalActions
+                    action, workflow, taskStep
                )
                actionResults.append(result)
                
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
            
            # Use connectionReference from selection (required)
            connectionRef = selection.get('connectionReference')
+            
+            # If not found at top level, check in selection['parameters'] (guided action case)
+            if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
+                connectionRef = selection['parameters'].get('connectionReference')
+            
            if connectionRef:
                # Check if action actually has connectionReference parameter
                methodName, actionName = compoundActionName.split('.', 1)
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
 OBJECTIVE: {{KEY:TASK_OBJECTIVE}}

 === AVAILABLE RESOURCES ===
-DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
+AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
 {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
-CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
+AVAILABLE_CONNECTIONS_INDEX:
+{{KEY:AVAILABLE_CONNECTIONS_INDEX}}

 === AVAILABLE ACTIONS ===
 {{KEY:AVAILABLE_METHODS}}
@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
 - Be the next logical incremental step (not complete entire objective in one step)
 - Target exactly one output format if producing files
 - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
+- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
 - Learn from previous validation feedback and avoid repeated mistakes
 - Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)

@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
    "successCriteria": ["specific criterion 1", "specific criterion 2"],
    "userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
    "learnings": ["..."],
-    "requiredInputDocuments": ["docList:..."],
+    "requiredInputDocuments": ["docItem:<documentId>:<filename>", "docList:<label>"],
    "requiredConnection": "connection:..." | null,
    "parametersContext": "concise text that Stage 2 will use to set business parameters"
 }}
@ -115,6 +117,9 @@ Analyze actionObjective to determine:
 3. parametersContext: short, sufficient for Stage 2
 4. Return ONLY JSON - no markdown, no explanations
 5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
+   - For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
+   - For document lists: use docList:<label> format
+   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
 6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX
 7. Plan incrementally: one output format per step
 8. Learn from validation feedback - avoid repeating mistakes
@ -307,6 +312,7 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
        PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
        PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
        PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
+        PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
    ]

    template = """TASK DECISION
@ -321,7 +327,9 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)

 === AVAILABLE RESOURCES ===
 ACTIONS: {{KEY:AVAILABLE_METHODS}}
-DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
+AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
+AVAILABLE_CONNECTIONS_INDEX:
+{{KEY:AVAILABLE_CONNECTIONS_INDEX}}

 {{KEY:REVIEW_CONTENT}}

@ -334,12 +342,20 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
 - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered

 === OUTPUT FORMAT ===
+Return ONLY JSON (no markdown, no explanations). The decision MUST:
+- Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
+- ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
+- Use ONLY exact labels from AVAILABLE_CONNECTIONS_INDEX (connection:...)
+- Provide concrete parameter values in nextActionParameters (not placeholders)
+- Match parameter names exactly as defined in AVAILABLE_METHODS
+
 {{
  "status": "continue",
  "reason": "Brief reason explaining why continuing",
  "nextAction": "Selected_action_from_ACTIONS",
  "nextActionParameters": {{
-    "documentList": ["docItem:reference_from_DOCUMENTS"],
+    "documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
+    "connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
    "parameter1": "value1",
    "parameter2": "value2"
  }},
@ -347,16 +363,21 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
 }}

 === RULES ===
- If "continue": MUST provide nextAction and nextActionParameters
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
- CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
- Do NOT repeat failed actions - suggest DIFFERENT approach
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
- If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
+1. Return ONLY JSON - no markdown, no explanations
+2. If "continue": MUST provide nextAction and nextActionParameters
+3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
+4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
+5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
+   - For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
+   - For document lists: use docList:<label> format
+   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
+6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
+7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
+8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
+9. Do NOT repeat failed actions - suggest DIFFERENT approach
+10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
+11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
+12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart

 """

--- a/modules/workflows/processing/workflowProcessor.py
+++ b/modules/workflows/processing/workflowProcessor.py
@ -428,7 +428,7 @@ class WorkflowProcessor:
            )
            
            # Prepare AI call options for fast path (balanced, fast processing)
-            from modules.datamodels.datamodelAi import AiCallOptions
+            from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest
            
            options = AiCallOptions(
                operationType=OperationTypeEnum.DATA_ANALYSE,
@ -438,16 +438,19 @@ class WorkflowProcessor:
                maxProcessingTime=15  # Fast path should complete in 15s
            )
            
-            # Call AI (content call - no documents needed for fast path)
-            aiResponse = await self.services.ai.callAiContent(
+            # Call AI directly (no document generation - just plain text response)
+            # Use aiObjects.call() instead of callAiContent() to avoid document generation path
+            aiRequest = AiCallRequest(
                prompt=fastPathPrompt,
-                contentParts=None,  # Fast path doesn't process documents
+                context="",
                options=options,
-                outputFormat=None  # Text response, not document generation
+                contentParts=None  # Fast path doesn't process documents
            )
            
-            # Extract response content (AiResponse.content is a string)
-            responseText = aiResponse.content if isinstance(aiResponse, str) else (aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse))
+            aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
+            
+            # Extract response content (AiCallResponse.content is a string)
+            responseText = aiCallResponse.content if aiCallResponse.content else ""
            
            # Create ActionResult with response
            # For fast path, we create a simple text document with the response
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -162,30 +162,38 @@ class WorkflowManager:
            
            self.workflowProcessor = WorkflowProcessor(self.services)
            
-            # Process user-uploaded documents from userInput for complexity detection
-            # This is the correct way: use the input data directly, not workflow state
-            documents = []
-            if userInput.listFileId:
-                try:
-                    documents = await self._processFileIds(userInput.listFileId, None)
-                except Exception as e:
-                    logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
+            # Get workflow mode to determine if complexity detection is needed
+            workflowMode = getattr(self.services.workflow, 'workflowMode', None)
+            skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
            
-            # Detect complexity (AI-based semantic understanding) using user input documents
-            complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
-            logger.info(f"Request complexity detected: {complexity}")
+            if skipComplexityDetection:
+                logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
+                complexity = "moderate"  # Default for automation workflows
+            else:
+                # Process user-uploaded documents from userInput for complexity detection
+                # This is the correct way: use the input data directly, not workflow state
+                documents = []
+                if userInput.listFileId:
+                    try:
+                        documents = await self._processFileIds(userInput.listFileId, None)
+                    except Exception as e:
+                        logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
+                
+                # Detect complexity (AI-based semantic understanding) using user input documents
+                complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
+                logger.info(f"Request complexity detected: {complexity}")
            
            # Now send the first message (which will also process the documents again, but that's fine)
            await self._sendFirstMessage(userInput)
            
-            # Route to fast path for simple requests
-            if complexity == "simple":
+            # Route to fast path for simple requests (skip for automation mode)
+            if not skipComplexityDetection and complexity == "simple":
                logger.info("Routing to fast path for simple request")
                await self._executeFastPath(userInput, documents)
                return  # Fast path completes the workflow
            
-            # Route to full workflow for moderate/complex requests
-            logger.info(f"Routing to full workflow for {complexity} request")
+            # Route to full workflow for moderate/complex requests or automation mode
+            logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
            taskPlan = await self._planTasks(userInput)
            await self._executeTasks(taskPlan)
            await self._processWorkflowResults()