integration testing of adapted ai workflow with fixes

2025-12-01 19:15:50 +01:00 · 2025-12-01 19:15:50 +01:00 · b401be703f
commit b401be703f
parent aff37fd2e2
18 changed files with 648 additions and 293 deletions
--- a/modules/datamodels/datamodelChat.py
+++ b/modules/datamodels/datamodelChat.py
@ -400,6 +400,10 @@ class ActionDocument(BaseModel):
        None,
        description="Source JSON structure (preserved when rendering to xlsx/docx/pdf)"
    )
    validationMetadata: Optional[Dict[str, Any]] = Field(
        None,
        description="Action-specific metadata for content validation (e.g., email recipients, attachments, SharePoint paths)"
    )
 registerModelLabels(
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -262,11 +262,17 @@ class AiObjects:
                        logger.info(f"✅ Image content part processed successfully with model: {model.name}")
                        # Convert to AiCallResponse format
                        # Note: AiModelResponse doesn't have priceUsd, and processingTime can be None
                        # Calculate processing time if not provided (fallback to 0.0)
                        processingTime = getattr(modelResponse, 'processingTime', None)
                        if processingTime is None:
                            processingTime = 0.0
                        return AiCallResponse(
                            content=modelResponse.content,
                            modelName=model.name,
-                            priceUsd=modelResponse.priceUsd if hasattr(modelResponse, 'priceUsd') else 0.0,
+                            priceUsd=0.0,  # Price will be calculated elsewhere if needed
-                            processingTime=modelResponse.processingTime if hasattr(modelResponse, 'processingTime') else 0.0,
+                            processingTime=processingTime,
                            bytesSent=0,  # Will be calculated elsewhere
                            bytesReceived=0,  # Will be calculated elsewhere
                            errorCount=0
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -944,32 +944,17 @@ If no trackable items can be identified, return: {{"kpis": []}}
        )
        try:
            # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way)
            if not outputFormat:
                outputFormat = "txt"
            # Extraction is now separate - contentParts must be extracted before calling
            # Require operationType to be set before calling
            opType = getattr(options, "operationType", None)
            if not opType:
-                # If outputFormat is specified, default to DATA_GENERATE
+                # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE
-                if outputFormat:
+                options.operationType = OperationTypeEnum.DATA_GENERATE
-                    options.operationType = OperationTypeEnum.DATA_GENERATE
+                opType = OperationTypeEnum.DATA_GENERATE
                    opType = OperationTypeEnum.DATA_GENERATE
                else:
                    self.services.chat.progressLogUpdate(aiOperationId, 0.1, "Analyzing prompt parameters")
                    analyzedOptions = await self._analyzePromptAndCreateOptions(prompt)
                    if analyzedOptions and hasattr(analyzedOptions, "operationType") and analyzedOptions.operationType:
                        options.operationType = analyzedOptions.operationType
                        # Merge other analyzed options
                        if hasattr(analyzedOptions, "priority"):
                            options.priority = analyzedOptions.priority
                        if hasattr(analyzedOptions, "processingMode"):
                            options.processingMode = analyzedOptions.processingMode
                        if hasattr(analyzedOptions, "compressPrompt"):
                            options.compressPrompt = analyzedOptions.compressPrompt
                        if hasattr(analyzedOptions, "compressContext"):
                            options.compressContext = analyzedOptions.compressContext
                    else:
                        # Default to DATA_ANALYSE if analysis fails
                        options.operationType = OperationTypeEnum.DATA_ANALYSE
                    opType = options.operationType
            # Handle IMAGE_GENERATE operations
            if opType == OperationTypeEnum.IMAGE_GENERATE:
@ -1052,171 +1037,232 @@ If no trackable items can be identified, return: {{"kpis": []}}
                    self.services.chat.progressLogFinish(aiOperationId, False)
                    raise ValueError(errorMsg)
-            # Handle document generation (outputFormat specified)
+            # Handle document generation (outputFormat always set, defaults to "txt")
-            if outputFormat:
+            # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way
-                # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
+            # outputFormat is always set now (defaults to "txt" if not specified)
                options.compressPrompt = False
                options.compressContext = False
-                # Convert contentParts to text for generation prompt (if provided)
+            # CRITICAL: For document generation with JSON templates, NEVER compress the prompt
-                if contentParts:
+            options.compressPrompt = False
-                    # Convert contentParts to text for generation prompt
+            options.compressContext = False
-                    content_for_generation = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
+            
            # Process contentParts for generation prompt (if provided)
            # Use generic _callWithContentParts() which handles all content types (images, text, etc.)
            # This automatically processes images with vision models and merges all results
            if contentParts:
                # Filter out binary/other parts that shouldn't be processed
                processableParts = []
                skippedParts = []
                for p in contentParts:
                    if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))):
                        processableParts.append(p)
                    else:
                        skippedParts.append(p)
                if skippedParts:
                    logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation")
                if processableParts:
                    # Count images for progress update
                    imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))])
                    if imageCount > 0:
                        self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models")
                    # Build proper extraction prompt using buildExtractionPrompt
                    # This creates a focused extraction prompt, not the user's generation prompt
                    from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt
                    # Determine renderer for format-specific guidelines
                    renderer = None
                    if outputFormat:
                        try:
                            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
                            generationService = GenerationService(self.services)
                            renderer = generationService.getRendererForFormat(outputFormat)
                        except Exception as e:
                            logger.debug(f"Could not get renderer for format {outputFormat}: {e}")
                    extractionPrompt = await buildExtractionPrompt(
                        outputFormat=outputFormat or "txt",
                        userPrompt=prompt,  # User's prompt as context for what to extract
                        title=title or "Document",
                        aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None,
                        services=self.services,
                        renderer=renderer
                    )
                    logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt")
                    # Use DATA_EXTRACT operation type for extraction
                    extractionOptions = AiCallOptions(
                        operationType=OperationTypeEnum.DATA_EXTRACT,  # Use DATA_EXTRACT for extraction
                        compressPrompt=options.compressPrompt,
                        compressContext=options.compressContext
                    )
                    extractionRequest = AiCallRequest(
                        prompt=extractionPrompt,  # Use proper extraction prompt, not user's generation prompt
                        context="",
                        options=extractionOptions,
                        contentParts=processableParts
                    )
                    # Write debug file for extraction prompt (all parts)
                    self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt")
                    # Call generic content parts processor - handles images, text, chunking, merging
                    extractionResponse = await self.aiObjects.call(extractionRequest)
                    # Write debug file for extraction response
                    if extractionResponse.content:
                        self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response")
                    else:
                        self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response")
                        logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})")
                    # Use extracted content directly for generation prompt
                    if extractionResponse.errorCount == 0 and extractionResponse.content:
                        # The extracted content is already merged and ready to use
                        content_for_generation = extractionResponse.content
                        logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation")
                    else:
                        # Extraction failed - use placeholders
                        logger.warning(f"Content extraction failed, using placeholders")
                        placeholderParts = []
                        for p in processableParts:
                            placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]")
                        content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None
                else:
                    content_for_generation = None
                    logger.debug("No processable parts found in contentParts")
            else:
                content_for_generation = None
-                self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
+            self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt")
-                from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
+            from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt
-                generation_prompt = await buildGenerationPrompt(
+            generation_prompt = await buildGenerationPrompt(
-                    outputFormat, prompt, title, content_for_generation, None
+                outputFormat, prompt, title, content_for_generation, None
            )
            promptArgs = {
                "outputFormat": outputFormat,
                "userPrompt": prompt,
                "title": title,
                "extracted_content": content_for_generation
            }
            self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
            # Extract user prompt from promptArgs for task completion analysis
            userPrompt = None
            if promptArgs:
                userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
            generated_json = await self._callAiWithLooping(
                generation_prompt, 
                options, 
                "document_generation",
                buildGenerationPrompt,
                promptArgs,
                aiOperationId,
                userPrompt=userPrompt
            )
            self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
            try:
                extracted_json = self.services.utils.jsonExtractString(generated_json)
                generated_data = json.loads(extracted_json)
            except json.JSONDecodeError as e:
                logger.error(f"Failed to parse generated JSON: {str(e)}")
                self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
                self.services.chat.progressLogFinish(aiOperationId, False)
                raise ValueError(f"Generated content is not valid JSON: {str(e)}")
            # Extract title and filename from generated document structure
            extractedTitle = title
            extractedFilename = None
            if isinstance(generated_data, dict) and "documents" in generated_data:
                docs = generated_data["documents"]
                if isinstance(docs, list) and len(docs) > 0:
                    firstDoc = docs[0]
                    if isinstance(firstDoc, dict):
                        if firstDoc.get("title"):
                            extractedTitle = firstDoc["title"]
                        if firstDoc.get("filename"):
                            extractedFilename = firstDoc["filename"]
            # Ensure metadata contains the extracted title
            if "metadata" not in generated_data:
                generated_data["metadata"] = {}
            if extractedTitle:
                generated_data["metadata"]["title"] = extractedTitle
            # Create separate operation for content rendering
            renderOperationId = f"{aiOperationId}_render"
            renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
            self.services.chat.progressLogStart(
                renderOperationId,
                "Content Rendering",
                "Rendering",
                f"Format: {outputFormat}",
                parentId=renderParentLogId
            )
            try:
                from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
                generationService = GenerationService(self.services)
                self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
                rendered_content, mime_type = await generationService.renderReport(
                    generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
                )
                self.services.chat.progressLogFinish(renderOperationId, True)
-                promptArgs = {
+                # Determine document name
-                    "outputFormat": outputFormat,
+                if extractedFilename:
-                    "userPrompt": prompt,
+                    documentName = extractedFilename
-                    "title": title,
+                elif extractedTitle and extractedTitle != "Generated Document":
-                    "extracted_content": content_for_generation
+                    sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
-                }
+                    sanitized = re.sub(r"_+", "_", sanitized).strip("_")
-                
+                    if sanitized:
-                self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation")
+                        if not sanitized.lower().endswith(f".{outputFormat}"):
-                # Extract user prompt from promptArgs for task completion analysis
+                            documentName = f"{sanitized}.{outputFormat}"
                userPrompt = None
                if promptArgs:
                    userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt")
                generated_json = await self._callAiWithLooping(
                    generation_prompt, 
                    options, 
                    "document_generation",
                    buildGenerationPrompt,
                    promptArgs,
                    aiOperationId,
                    userPrompt=userPrompt
                )
                self.services.chat.progressLogUpdate(aiOperationId, 0.7, "Parsing generated JSON")
                try:
                    extracted_json = self.services.utils.jsonExtractString(generated_json)
                    generated_data = json.loads(extracted_json)
                except json.JSONDecodeError as e:
                    logger.error(f"Failed to parse generated JSON: {str(e)}")
                    self.services.utils.writeDebugFile(generated_json, "failed_json_parsing")
                    self.services.chat.progressLogFinish(aiOperationId, False)
                    raise ValueError(f"Generated content is not valid JSON: {str(e)}")
                # Extract title and filename from generated document structure
                extractedTitle = title
                extractedFilename = None
                if isinstance(generated_data, dict) and "documents" in generated_data:
                    docs = generated_data["documents"]
                    if isinstance(docs, list) and len(docs) > 0:
                        firstDoc = docs[0]
                        if isinstance(firstDoc, dict):
                            if firstDoc.get("title"):
                                extractedTitle = firstDoc["title"]
                            if firstDoc.get("filename"):
                                extractedFilename = firstDoc["filename"]
                # Ensure metadata contains the extracted title
                if "metadata" not in generated_data:
                    generated_data["metadata"] = {}
                if extractedTitle:
                    generated_data["metadata"]["title"] = extractedTitle
                # Create separate operation for content rendering
                renderOperationId = f"{aiOperationId}_render"
                renderParentLogId = self.services.chat.getOperationLogId(aiOperationId)
                self.services.chat.progressLogStart(
                    renderOperationId,
                    "Content Rendering",
                    "Rendering",
                    f"Format: {outputFormat}",
                    parentId=renderParentLogId
                )
                try:
                    from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
                    generationService = GenerationService(self.services)
                    self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format")
                    rendered_content, mime_type = await generationService.renderReport(
                        generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self
                    )
                    self.services.chat.progressLogFinish(renderOperationId, True)
                    # Determine document name
                    if extractedFilename:
                        documentName = extractedFilename
                    elif extractedTitle and extractedTitle != "Generated Document":
                        sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle)
                        sanitized = re.sub(r"_+", "_", sanitized).strip("_")
                        if sanitized:
                            if not sanitized.lower().endswith(f".{outputFormat}"):
                                documentName = f"{sanitized}.{outputFormat}"
                            else:
                                documentName = sanitized
                        else:
-                            documentName = f"generated.{outputFormat}"
+                            documentName = sanitized
                    else:
                        documentName = f"generated.{outputFormat}"
                else:
                    documentName = f"generated.{outputFormat}"
-                    # Build document data
+                # Build document data
-                    docData = DocumentData(
+                docData = DocumentData(
-                        documentName=documentName,
+                    documentName=documentName,
-                        documentData=rendered_content,
+                    documentData=rendered_content,
-                        mimeType=mime_type,
+                    mimeType=mime_type,
-                        sourceJson=generated_data  # Preserve source JSON for structure validation
+                    sourceJson=generated_data  # Preserve source JSON for structure validation
                    )
                    metadata = AiResponseMetadata(
                        title=extractedTitle or title or "Generated Document",
                        filename=extractedFilename,
                        operationType=opType.value if opType else None
                    )
                    self.services.utils.writeDebugFile(str(generated_data), "document_generation_response")
                    self.services.chat.progressLogFinish(aiOperationId, True)
                    return AiResponse(
                        content=json.dumps(generated_data),
                        metadata=metadata,
                        documents=[docData]
                    )
                except Exception as e:
                    logger.error(f"Error rendering document: {str(e)}")
                    if renderOperationId:
                        self.services.chat.progressLogFinish(renderOperationId, False)
                    self.services.chat.progressLogFinish(aiOperationId, False)
                    raise ValueError(f"Rendering failed: {str(e)}")
            # Handle text processing (no outputFormat)
            self.services.chat.progressLogUpdate(aiOperationId, 0.5, "Processing text call")
            if contentParts:
                # Process contentParts through AI
                # Convert contentParts to text for prompt
                contentText = "\n\n".join([f"[{part.label}]\n{part.data}" for part in contentParts if part.data])
                fullPrompt = f"{prompt}\n\n{contentText}" if contentText else prompt
                result_content = await self._callAiWithLooping(
                    fullPrompt, options, "text", None, None, aiOperationId
                )
            else:
                # Direct text call (no documents to process)
                result_content = await self._callAiWithLooping(
                    prompt, options, "text", None, None, aiOperationId
                )
-            metadata = AiResponseMetadata(
+                metadata = AiResponseMetadata(
-                operationType=opType.value if opType else None
+                    title=extractedTitle or title or "Generated Document",
-            )
+                    filename=extractedFilename,
                    operationType=opType.value if opType else None
                )
-            self.services.chat.progressLogFinish(aiOperationId, True)
+                # Write JSON with proper formatting (not str() which can truncate)
                jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False)
                self.services.utils.writeDebugFile(jsonStr, "document_generation_response")
                self.services.chat.progressLogFinish(aiOperationId, True)
-            return AiResponse(
+                return AiResponse(
-                content=result_content,
+                    content=json.dumps(generated_data),
-                metadata=metadata
+                    metadata=metadata,
-            )
+                    documents=[docData]
                )
            except Exception as e:
                logger.error(f"Error rendering document: {str(e)}")
                if renderOperationId:
                    self.services.chat.progressLogFinish(renderOperationId, False)
                self.services.chat.progressLogFinish(aiOperationId, False)
                raise ValueError(f"Rendering failed: {str(e)}")
        except Exception as e:
            logger.error(f"Error in callAiContent: {str(e)}")
--- a/modules/services/serviceAi/subJsonResponseHandling.py
+++ b/modules/services/serviceAi/subJsonResponseHandling.py
@ -1236,8 +1236,12 @@ class JsonResponseHandler:
                # Simple path format: "sections[0].elements[0].items" or "sections[0].elements[0].rows"
                value = JsonResponseHandler._extractValueByPath(parsedJson, jsonPath)
                # Handle None (path doesn't exist - incomplete JSON)
                if value is None:
                    updatedKpi["currentValue"] = kpi.get("currentValue", 0)
                    logger.debug(f"KPI {kpiId} path {jsonPath} not found in JSON (incomplete), keeping current value {updatedKpi['currentValue']}")
                # Count items/rows/elements based on type
-                if isinstance(value, list):
+                elif isinstance(value, list):
                    updatedKpi["currentValue"] = len(value)
                    logger.debug(f"Extracted KPI {kpiId} from path {jsonPath}: list with {len(value)} items")
                elif isinstance(value, (int, float)):
@ -1296,8 +1300,12 @@ class JsonResponseHandler:
                # Extract value using path
                value = JsonResponseHandler._extractValueByPath(parsed, jsonPath)
                # Handle None (path doesn't exist - incomplete JSON)
                if value is None:
                    updatedKpi["currentValue"] = kpi.get("currentValue", 0)
                    logger.debug(f"KPI {kpiId} path {jsonPath} not found in completed JSON (still incomplete), keeping current value {updatedKpi['currentValue']}")
                # Count items/rows/elements based on type
-                if isinstance(value, list):
+                elif isinstance(value, list):
                    updatedKpi["currentValue"] = len(value)
                    logger.debug(f"Extracted KPI {kpiId} from completed JSON: list with {len(value)} items")
                elif isinstance(value, (int, float)):
@ -1321,6 +1329,7 @@ class JsonResponseHandler:
        Extract value from object using dot-notation path with array indices.
        Example: "sections[0].elements[0].items"
        Returns None if path doesn't exist (for incomplete JSON handling).
        """
        parts = path.split('.')
        current = obj
@ -1332,20 +1341,30 @@ class JsonResponseHandler:
                index = int(part[part.index('[') + 1:part.index(']')])
                if key:
-                    current = current.get(key, [])
+                    if isinstance(current, dict):
-                if isinstance(current, list) and 0 <= index < len(current):
+                        current = current.get(key)
-                    current = current[index]
+                        if current is None:
                            return None  # Key doesn't exist
                    else:
                        return None  # Can't access key on non-dict
                if isinstance(current, list):
                    if 0 <= index < len(current):
                        current = current[index]
                    else:
                            # Index out of range - return None for incomplete JSON
                            return None
                else:
-                    raise KeyError(f"Invalid index {index} for {key}")
+                    # Not a list, can't index
                    return None
            else:
                # Handle dict access
                if isinstance(current, dict):
                    current = current.get(part)
                    if current is None:
                        return None  # Key doesn't exist
                else:
-                    raise KeyError(f"Cannot access {part} on {type(current)}")
+                    return None  # Can't access key on non-dict
            if current is None:
                raise KeyError(f"Path {path} returned None at {part}")
        return current
--- a/modules/services/serviceChat/mainServiceChat.py
+++ b/modules/services/serviceChat/mainServiceChat.py
@ -92,13 +92,16 @@ class ChatService:
                if docRef.startswith("docItem:"):
                    # docItem:<id>:<filename> or docItem:<id> (filename is optional)
                    # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
                    # Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
                    parts = docRef.split(':')
                    if len(parts) >= 2:
                        docId = parts[1]  # This should be the documentId (UUID)
                        docFound = False
                        # ALWAYS try to match by documentId first (regardless of number of parts)
-                        # This handles: docItem:documentId and docItem:documentId:filename
+                        # This handles both formats:
                        # - docItem:<documentId> (without filename - still works)
                        # - docItem:<documentId>:<filename> (with filename - preferred)
                        for message in workflow.messages:
                            # Validate message belongs to this workflow
                            msgWorkflowId = getattr(message, 'workflowId', None)
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@ -138,6 +138,36 @@ class ExtractionService:
                f"extraction.process.{doc.mimeType}"
            )
            # Write extraction results to debug file
            try:
                from modules.shared.debugLogger import writeDebugFile
                import json
                # Create summary of extraction results for debug
                extractionSummary = {
                    "documentName": doc.fileName,
                    "documentMimeType": doc.mimeType,
                    "partsCount": len(ec.parts),
                    "parts": []
                }
                for part in ec.parts:
                    partSummary = {
                        "typeGroup": part.typeGroup,
                        "mimeType": part.mimeType,
                        "label": part.label,
                        "dataLength": len(part.data) if part.data else 0,
                        "metadata": part.metadata
                    }
                    # Include data preview for small parts (first 500 chars)
                    if part.data and len(part.data) <= 500:
                        partSummary["dataPreview"] = part.data[:500]
                    elif part.data:
                        partSummary["dataPreview"] = f"[Large data: {len(part.data)} chars - truncated]"
                    extractionSummary["parts"].append(partSummary)
                writeDebugFile(json.dumps(extractionSummary, indent=2, ensure_ascii=False), f"extraction_result_{doc.fileName}")
            except Exception as e:
                logger.debug(f"Failed to write extraction debug file: {str(e)}")
            results.append(ec)
        return results
--- a/modules/services/serviceExtraction/subPromptBuilderExtraction.py
+++ b/modules/services/serviceExtraction/subPromptBuilderExtraction.py
@ -99,9 +99,16 @@ async def buildExtractionPrompt(
    # Parse extraction intent if AI service is available
    extraction_intent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services) if aiService else userPrompt
-    # Build base prompt
+    # Build base prompt with clear user prompt markers
    sanitized_user_prompt = services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt
    adaptive_prompt = f"""
-{services.utils.sanitizePromptContent(userPrompt, 'userinput') if services else userPrompt}
+{'='*80}
 USER REQUEST / USER PROMPT:
 {'='*80}
 {sanitized_user_prompt}
 {'='*80}
 END OF USER REQUEST / USER PROMPT
 {'='*80}
 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -479,25 +479,11 @@ class RendererXlsx(BaseRenderer):
                sheetNames.append(sectionTitle[:31])  # Excel sheet name limit
        else:
-            # Single table or mixed content - create main sheet
+            # Single table or mixed content - create only main sheet
            documentTitle = jsonContent.get("metadata", {}).get("title", "Document")
            sheetNames.append(documentTitle[:31])  # Excel sheet name limit
-            # Add additional sheets for other content types
+        return sheetNames
            contentTypes = set()
            for section in sections:
                contentType = section.get("content_type", "paragraph")
                contentTypes.add(contentType)
            if "table" in contentTypes and len(tableSections) == 1:
                sheetNames.append("Table Data")
            if "list" in contentTypes:
                sheetNames.append("Lists")
            if "paragraph" in contentTypes or "heading" in contentTypes:
                sheetNames.append("Text")
        # Limit to 4 sheets maximum
        return sheetNames[:4]
    def _populateExcelSheets(self, sheets: Dict[str, Any], jsonContent: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Populate Excel sheets with content from JSON based on actual sheet names."""
@ -527,14 +513,10 @@ class RendererXlsx(BaseRenderer):
                                sheetTitle = caption
                        self._populateTableSheet(sheet, section, styles, sheetTitle)
            else:
-                # Single table or mixed content - use original logic
+                # Single table or mixed content - populate only main sheet
                firstSheetName = sheetNames[0]
                self._populateMainSheet(sheets[firstSheetName], jsonContent, styles)
                # If we have multiple sheets, distribute content by type
                if len(sheetNames) > 1:
                    self._populateContentTypeSheets(sheets, jsonContent, styles, sheetNames[1:])
        except Exception as e:
            self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
--- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py
+++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py
@ -72,7 +72,13 @@ async def buildGenerationPrompt(
        continuationText += "Start directly with the next element/section that should follow.\n\n"
        # PROMPT FOR CONTINUATION
-        generationPrompt = f"""User request: "{userPrompt}"
+        generationPrompt = f"""{'='*80}
 USER REQUEST / USER PROMPT:
 {'='*80}
 {userPrompt}
 {'='*80}
 END OF USER REQUEST / USER PROMPT
 {'='*80}
 ⚠️ CONTINUATION MODE: Response was incomplete. Generate ONLY the remaining content.
@ -93,8 +99,57 @@ Continue generating the remaining content now.
    else:
        # PROMPT FOR FIRST CALL
        # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions
-        generationPrompt = f"""User request: "{userPrompt}"
+        if extracted_content:
            # If we have extracted content, put it FIRST and make it very clear it's the source data
            generationPrompt = f"""{'='*80}
 USER REQUEST / USER PROMPT:
 {'='*80}
 {userPrompt}
 {'='*80}
 END OF USER REQUEST / USER PROMPT
 {'='*80}
 {'='*80}
 ⚠️ CRITICAL: USE THIS EXTRACTED CONTENT AS YOUR DATA SOURCE ⚠️
 {'='*80}
 The content below contains the ACTUAL DATA extracted from the source documents.
 You MUST use this data - DO NOT generate fake or example data.
 {'='*80}
 EXTRACTED CONTENT FROM DOCUMENTS:
 {'='*80}
 {extracted_content}
 {'='*80}
 END OF EXTRACTED CONTENT
 {'='*80}
 Generate a VALID JSON response using the EXTRACTED CONTENT above as your data source.
 The JSON structure template below shows ONLY the structure pattern - the example values are NOT real data.
 You MUST use the actual data from EXTRACTED CONTENT above, NOT the example values from the template.
 JSON structure template (structure only - use data from EXTRACTED CONTENT above):
 {jsonTemplate}
 Instructions:
 - Return ONLY valid JSON (strict). No comments. No trailing commas. Use double quotes.
 - Do NOT reuse example section IDs; create your own.
 - CRITICAL: Use the ACTUAL DATA from EXTRACTED CONTENT above, NOT the example values from the template.
 - Generate complete content based on the user request and the extracted content. Do NOT just give an instruction or comments. Deliver the complete response.
 - IMPORTANT: Set a meaningful "filename" in each document with appropriate file extension (e.g., "prime_numbers.txt", "report.docx", "data.json"). The filename should reflect the content and task objective.
 - Output JSON only; no markdown fences or extra text.
 Generate your complete response using the extracted content data.
 """
        else:
            # No extracted content - generate from scratch
            generationPrompt = f"""{'='*80}
 USER REQUEST / USER PROMPT:
 {'='*80}
 {userPrompt}
 {'='*80}
 END OF USER REQUEST / USER PROMPT
 {'='*80}
 Generate a VALID JSON response for the user request. The template below shows ONLY the structure pattern - it is NOT existing content.
@ -111,12 +166,5 @@ Instructions:
 Generate your complete response.
 """
    # If we have extracted content, prepend it to the prompt
    if extracted_content:
        generationPrompt = f"""EXTRACTED CONTENT FROM DOCUMENTS:
 {extracted_content}
 {generationPrompt}"""
    return generationPrompt.strip()
--- a/modules/shared/jsonUtils.py
+++ b/modules/shared/jsonUtils.py
@ -102,12 +102,30 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
    Attempt to repair broken JSON using multiple strategies.
    Generic solution that works for any content type.
    Returns the best repair attempt or None if all fail.
    IMPORTANT: This function tries to preserve ALL data by avoiding truncation.
    Only uses truncation as a last resort when structure closing fails.
    """
    if not text:
        return None
-    # Strategy 1: Try to extract sections from the entire text first
+    # Strategy 1: Structure closing - close incomplete structures WITHOUT truncating
    # This preserves all data and should be tried first
    closedStr = closeJsonStructures(text)
    obj, err, _ = tryParseJson(closedStr)
    if err is None and isinstance(obj, dict):
        sections = extractSectionsFromDocument(obj)
        if sections:
            logger.info(f"Repaired JSON using structure closing (preserved all data, found {len(sections)} sections)")
            return obj
        else:
            # Structure closing worked but no sections found - still return it
            logger.info("Repaired JSON using structure closing (preserved all data, but no sections found)")
            return obj
    # Strategy 2: Try to extract sections from the entire text using regex
    # This handles cases where the JSON structure is broken but content is intact
    # NOTE: _extractSectionsRegex may truncate, but we try it before progressive parsing
    extractedSections = _extractSectionsRegex(text)
    if extractedSections:
        logger.info(f"Extracted {len(extractedSections)} sections using regex")
@ -120,7 +138,10 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
            "documents": [{"sections": extractedSections}]
        }
-    # Strategy 2: Progressive parsing - try to find longest valid prefix
+    # Strategy 3: Progressive parsing - try to find longest valid prefix (TRUNCATES DATA)
    # WARNING: This strategy truncates the input and loses data after the truncation point
    # Only use as last resort when other strategies fail
    logger.warning("Structure closing and regex extraction failed, trying progressive parsing (WILL TRUNCATE DATA)")
    bestResult = None
    bestValidLength = 0
@ -133,13 +154,13 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
            if err is None and isinstance(obj, dict):
                bestResult = obj
                bestValidLength = i
-                logger.debug(f"Progressive parsing success at length {i} (step: {stepSize})")
+                logger.debug(f"Progressive parsing success at length {i} (step: {stepSize}) - DATA TRUNCATED AT POSITION {i}")
                break
        if bestResult:
            break
    if bestResult:
-        logger.info(f"Repaired JSON using progressive parsing (valid length: {bestValidLength})")
+        logger.warning(f"Repaired JSON using progressive parsing (valid length: {bestValidLength}, DATA LOST AFTER THIS POINT)")
        # Check if we have sections in the result
        sections = extractSectionsFromDocument(bestResult)
@ -160,13 +181,6 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
                bestResult["documents"][0]["sections"].extend(extractedSections)
                return bestResult
    # Strategy 3: Structure closing - close incomplete structures
    closedStr = closeJsonStructures(text)
    obj, err, _ = tryParseJson(closedStr)
    if err is None and isinstance(obj, dict):
        logger.info("Repaired JSON using structure closing")
        return obj
    logger.warning("All repair strategies failed")
    return None
@ -174,18 +188,43 @@ def repairBrokenJson(text: str) -> Optional[Dict[str, Any]]:
 def closeJsonStructures(text: str) -> str:
    """
    Close incomplete JSON structures by adding missing closing brackets.
    Also handles unterminated strings by closing them.
    """
    if not text:
        return text
    result = text
    # Handle unterminated strings: find the last unclosed string
    # Look for patterns like: "value" or "value\n (unterminated)
    # Simple heuristic: if we end with an unterminated string (odd number of quotes at end)
    # Try to close it by finding the last opening quote and closing it
    if result.strip():
        # Count quotes - if odd number, we have an unterminated string
        quoteCount = result.count('"')
        if quoteCount % 2 == 1:
            # Find the last opening quote that's not escaped
            lastQuotePos = result.rfind('"')
            if lastQuotePos >= 0:
                # Check if it's escaped
                escapeCount = 0
                i = lastQuotePos - 1
                while i >= 0 and result[i] == '\\':
                    escapeCount += 1
                    i -= 1
                # If not escaped (even number of backslashes), close the string
                if escapeCount % 2 == 0:
                    # Find where the string should end (before next comma, bracket, or brace)
                    # For now, just close it at the end
                    result += '"'
    # Count open/close brackets and braces
-    openBraces = text.count('{')
+    openBraces = result.count('{')
-    closeBraces = text.count('}')
+    closeBraces = result.count('}')
-    openBrackets = text.count('[')
+    openBrackets = result.count('[')
-    closeBrackets = text.count(']')
+    closeBrackets = result.count(']')
    # Close incomplete structures
    result = text
    for _ in range(openBraces - closeBraces):
        result += '}'
    for _ in range(openBrackets - closeBrackets):
@ -202,11 +241,24 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
    NOTE: This function is called FROM repairBrokenJson, so it must NOT call repairBrokenJson
    to avoid circular dependency. Instead, it implements its own repair strategies.
    IMPORTANT: Tries to preserve data by using structure closing first before truncation.
    """
    sections = []
-    # Strategy 1: Try progressive parsing to find longest valid JSON prefix
+    # Strategy 1: Try structure closing WITHOUT truncation first (preserves all data)
-    # Find the longest valid JSON prefix that contains sections
+    closed_str = closeJsonStructures(text)
    obj, err, _ = tryParseJson(closed_str)
    if err is None and isinstance(obj, dict):
        extracted_sections = extractSectionsFromDocument(obj)
        if extracted_sections:
            logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using structure closing (preserved all data)")
            return extracted_sections
    # Strategy 2: Try progressive parsing to find longest valid JSON prefix (TRUNCATES DATA)
    # WARNING: This truncates the input and loses data
    # Only use if structure closing failed
    logger.debug("_extractSectionsRegex: Structure closing failed, trying progressive parsing (WILL TRUNCATE)")
    best_result = None
    best_valid_length = 0
    for step_size in [1000, 500, 100, 50, 10]:
@ -217,7 +269,7 @@ def _extractSectionsRegex(text: str) -> List[Dict[str, Any]]:
            if err is None and isinstance(obj, dict):
                extracted_sections = extractSectionsFromDocument(obj)
                if extracted_sections:
-                    logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i}")
+                    logger.debug(f"_extractSectionsRegex: Extracted {len(extracted_sections)} sections using progressive parsing at length {i} (DATA TRUNCATED)")
                    return extracted_sections
                # Store best result even if no sections found
                if not best_result:
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -1183,11 +1183,13 @@ Max length: {maxLength} characters
 Based on the context, decide which documents to attach.
 CRITICAL: Use EXACT document references from Available_Document_References above. For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
 Return JSON:
 {{
  "subject": "subject line",
  "body": "email body (HTML allowed)",
-  "attachments": ["doc_ref1", "doc_ref2"]
+  "attachments": ["docItem:<documentId>:<filename>"]
 }}
 """
@ -1237,6 +1239,9 @@ Return JSON:
                        elif isinstance(ai_attachments, list):
                            ai_attachments = [a for a in ai_attachments if isinstance(a, str)]
                        # Initialize normalized_ai_attachments
                        normalized_ai_attachments = []
                        if ai_attachments:
                            try:
                                ai_refs = [ai_attachments] if isinstance(ai_attachments, str) else ai_attachments
@ -1250,16 +1255,20 @@ Return JSON:
                            selected_docs = [d for d in ai_docs if getattr(d, 'id', None) in available_ids]
                            if selected_docs:
-                                # Map selected ChatDocuments back to docItem references
+                                # Map selected ChatDocuments back to docItem references (with full filename)
                                documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in selected_docs]
                                # Normalize ai_attachments to full format for storage
                                normalized_ai_attachments = documentList.copy()
                                logger.info(f"AI selected {len(documentList)} documents for attachment (resolved via ChatDocuments)")
                            else:
                                # No intersection; use all available documents
                                documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
                                normalized_ai_attachments = documentList.copy()
                                logger.warning("AI selected attachments not found in available documents, using all documents")
                        else:
                            # No AI selection; use all available documents
                            documentList = [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in available_docs]
                            normalized_ai_attachments = documentList.copy()
                            logger.warning("AI did not specify attachments, using all available documents")
                    else:
                        logger.info("No documents provided in documentList; skipping attachment processing")
@ -1363,7 +1372,7 @@ Return JSON:
                        "cc": cc,
                        "bcc": bcc,
                        "attachments": len(documentList),
-                        "aiSelectedAttachments": ai_attachments if ai_attachments else "all documents",
+                        "aiSelectedAttachments": normalized_ai_attachments if normalized_ai_attachments else "all documents",
                        "aiGenerated": True,
                        "context": context,
                        "emailStyle": emailStyle,
@ -1371,12 +1380,40 @@ Return JSON:
                        "draftData": draft_data
                    }
                    # Extract attachment filenames for validation metadata
                    attachmentFilenames = []
                    attachmentReferences = []
                    if documentList:
                        try:
                            from modules.datamodels.datamodelDocref import DocumentReferenceList
                            attached_docs = self.services.chat.getChatDocumentsFromDocumentList(DocumentReferenceList.from_string_list(documentList)) or []
                            attachmentFilenames = [getattr(doc, 'fileName', '') for doc in attached_docs if getattr(doc, 'fileName', None)]
                            # Store normalized document references (with filenames) - use normalized_ai_attachments if available
                            attachmentReferences = normalized_ai_attachments if normalized_ai_attachments else [self.services.chat.getDocumentReferenceFromChatDocument(d) for d in attached_docs]
                        except Exception:
                            pass
                    # Create validation metadata for content validator
                    validationMetadata = {
                        "actionType": "outlook.composeAndDraftEmailWithContext",
                        "emailRecipients": to,
                        "emailCc": cc,
                        "emailBcc": bcc,
                        "emailSubject": subject,
                        "emailAttachments": attachmentFilenames,
                        "emailAttachmentReferences": attachmentReferences,
                        "emailAttachmentCount": len(attachmentFilenames),
                        "emailStyle": emailStyle,
                        "hasAttachments": len(attachmentFilenames) > 0
                    }
                    return ActionResult(
                        success=True,
                        documents=[ActionDocument(
                            documentName=f"ai_generated_email_draft_{self._format_timestamp_for_filename()}.json",
                            documentData=json.dumps(draftResultData, indent=2),
-                            mimeType="application/json"
+                            mimeType="application/json",
                            validationMetadata=validationMetadata
                        )]
                    )
                else:
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@ -1154,6 +1154,53 @@ class MethodSharepoint(MethodBase):
                    resultData = json.loads(fileData)
                    foundDocuments = resultData.get("foundDocuments", [])
                    # If no foundDocuments, check if it's a listDocuments result (has listResults)
                    if not foundDocuments and "listResults" in resultData:
                        logger.info(f"pathObject contains listResults from listDocuments, converting to foundDocuments format")
                        listResults = resultData.get("listResults", [])
                        foundDocuments = []
                        siteIdFromList = None
                        siteNameFromList = None
                        for listResult in listResults:
                            siteResults = listResult.get("siteResults", [])
                            for siteResult in siteResults:
                                items = siteResult.get("items", [])
                                # Extract site info from first item if available
                                if items and not siteIdFromList:
                                    # Try to get site info from the siteResult structure
                                    # We need to discover sites to get the siteId
                                    siteNameFromList = items[0].get("siteName")
                                for item in items:
                                    # Convert listDocuments item format to foundDocuments format
                                    if item.get("type") == "file":
                                        foundDoc = {
                                            "id": item.get("id"),
                                            "name": item.get("name"),
                                            "type": "file",
                                            "siteName": item.get("siteName"),
                                            "siteId": None,  # Will be determined from site discovery
                                            "webUrl": item.get("webUrl"),
                                            "fullPath": item.get("webUrl", ""),
                                            "parentPath": item.get("parentPath", "")
                                        }
                                        foundDocuments.append(foundDoc)
                        # Discover sites to get siteId if we have siteName
                        if foundDocuments and siteNameFromList and not siteIdFromList:
                            logger.info(f"Discovering sites to find siteId for '{siteNameFromList}'")
                            allSites = await self._discoverSharePointSites()
                            matchingSites = self._filterSitesByHint(allSites, siteNameFromList)
                            if matchingSites:
                                siteIdFromList = matchingSites[0].get("id")
                                # Update all foundDocuments with siteId
                                for doc in foundDocuments:
                                    doc["siteId"] = siteIdFromList
                                logger.info(f"Found siteId '{siteIdFromList}' for site '{siteNameFromList}'")
                        logger.info(f"Converted {len(foundDocuments)} files from listResults format")
                    if foundDocuments:
                        # Extract SharePoint file IDs from foundDocuments
                        sharePointFileIds = [doc.get("id") for doc in foundDocuments if doc.get("type") == "file"]
@ -1167,6 +1214,15 @@ class MethodSharepoint(MethodBase):
                            siteName = firstDoc.get("siteName")
                            siteId = firstDoc.get("siteId")
                            # If siteId is missing (from listDocuments conversion), discover sites to find it
                            if siteName and not siteId:
                                logger.info(f"Site ID missing, discovering sites to find siteId for '{siteName}'")
                                allSites = await self._discoverSharePointSites()
                                matchingSites = self._filterSitesByHint(allSites, siteName)
                                if matchingSites:
                                    siteId = matchingSites[0].get("id")
                                    logger.info(f"Found siteId '{siteId}' for site '{siteName}'")
                            if siteName and siteId:
                                sites = [{
                                    "id": siteId,
@ -1174,6 +1230,19 @@ class MethodSharepoint(MethodBase):
                                    "webUrl": firstDoc.get("webUrl", "")
                                }]
                                logger.info(f"Using specific site from pathObject: {siteName} (ID: {siteId})")
                            elif siteName:
                                # Try to get site by name
                                allSites = await self._discoverSharePointSites()
                                matchingSites = self._filterSitesByHint(allSites, siteName)
                                if matchingSites:
                                    sites = [{
                                        "id": matchingSites[0].get("id"),
                                        "displayName": siteName,
                                        "webUrl": matchingSites[0].get("webUrl", "")
                                    }]
                                    logger.info(f"Found site by name: {siteName} (ID: {sites[0]['id']})")
                                else:
                                    return ActionResult.isFailure(error=f"Site '{siteName}' not found. Cannot determine target site for read operation.")
                            else:
                                return ActionResult.isFailure(error="Site information missing from pathObject. Cannot determine target site for read operation.")
                    else:
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -421,14 +421,6 @@ class ContentValidator:
            if actionName:
                # Convert action name to human-readable format
                actionDescription = actionName.replace("ai.", "").replace(".", " ").title()
                if "convert" in actionName.lower():
                    actionDescription = "Document format conversion"
                elif "generate" in actionName.lower() or "create" in actionName.lower():
                    actionDescription = "Document generation"
                elif "extract" in actionName.lower():
                    actionDescription = "Content extraction"
                elif "process" in actionName.lower():
                    actionDescription = "Content processing"
                actionContext = f"\nDOCUMENTS CREATED BY: {actionDescription} ({actionName})"
            # Build action parameters context
@ -441,6 +433,25 @@ class ContentValidator:
                    paramsJson = json.dumps(relevantParams, ensure_ascii=False, indent=2)
                    actionParamsContext = f"\nACTION PARAMETERS USED: {paramsJson}"
            # Extract validation metadata from documents (action-specific context)
            validationMetadataContext = ""
            if documents:
                metadataList = []
                for doc in documents:
                    metadata = getattr(doc, 'validationMetadata', None)
                    if metadata and isinstance(metadata, dict):
                        metadataList.append(metadata)
                if metadataList:
                    # Combine all metadata (usually just one document)
                    combinedMetadata = {}
                    for meta in metadataList:
                        combinedMetadata.update(meta)
                    if combinedMetadata:
                        metadataJson = json.dumps(combinedMetadata, ensure_ascii=False, indent=2)
                        validationMetadataContext = f"\nACTION VALIDATION METADATA: {metadataJson}"
            # Format success criteria for display with index numbers
            if successCriteria:
                criteriaDisplay = "\n".join([f"[{i}] {criterion}" for i, criterion in enumerate(successCriteria)])
@ -452,7 +463,7 @@ class ContentValidator:
 === TASK INFORMATION ===
 {objectiveLabel}: '{objectiveText}'
 EXPECTED DATA TYPE: {dataType}
-EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}
+EXPECTED FORMATS: {expectedFormats if expectedFormats else ['any']}{actionContext}{actionParamsContext}{validationMetadataContext}
 === VALIDATION INSTRUCTIONS ===
@ -466,6 +477,7 @@ VALIDATION RULES:
 5. Data availability assessment: If delivered documents do not contain required data, clearly indicate this in findings. Re-reading the same documents might not help.
 VALIDATION STEPS:
 - Check ACTION VALIDATION METADATA first (if present) - this contains action-specific context
 - Check structure summary for quantities, counts, statistics
 - Compare found values with required values from criteria
 - If structure unavailable, use metadata only (format, filename, size)
--- a/modules/workflows/processing/modes/modeAutomation.py
+++ b/modules/workflows/processing/modes/modeAutomation.py
@ -169,6 +169,10 @@ class AutomationMode(BaseMode):
        Execute task using Automation mode - executes predefined actions directly.
        No AI planning or review phases - actions are executed sequentially as defined.
        """
        # Get task index from workflow state for consistency
        if taskIndex is None:
            taskIndex = workflow.getTaskIndex()
        logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
        try:
@ -178,7 +182,6 @@ class AutomationMode(BaseMode):
            # Update workflow before executing task
            if taskIndex is not None:
                self._updateWorkflowBeforeExecutingTask(taskIndex)
                self.services.chat.setWorkflowContext(taskNumber=taskIndex)
            # Create task start message
            await self.messageCreator.createTaskStartMessage(taskStep, workflow, taskIndex, totalTasks)
@ -241,7 +244,7 @@ class AutomationMode(BaseMode):
                # Execute action
                result = await self.actionExecutor.executeSingleAction(
-                    action, workflow, taskStep, taskIndex, actionNumber, totalActions
+                    action, workflow, taskStep
                )
                actionResults.append(result)
--- a/modules/workflows/processing/modes/modeDynamic.py
+++ b/modules/workflows/processing/modes/modeDynamic.py
@ -561,6 +561,11 @@ class DynamicMode(BaseMode):
            # Use connectionReference from selection (required)
            connectionRef = selection.get('connectionReference')
            # If not found at top level, check in selection['parameters'] (guided action case)
            if not connectionRef and isinstance(selection, dict) and 'parameters' in selection:
                connectionRef = selection['parameters'].get('connectionReference')
            if connectionRef:
                # Check if action actually has connectionReference parameter
                methodName, actionName = compoundActionName.split('.', 1)
--- a/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsDynamic.py
@ -58,9 +58,10 @@ CONTEXT: {{KEY:OVERALL_TASK_CONTEXT}}
 OBJECTIVE: {{KEY:TASK_OBJECTIVE}}
 === AVAILABLE RESOURCES ===
-DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
+AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_SUMMARY}}
 {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
-CONNECTIONS: {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
+AVAILABLE_CONNECTIONS_INDEX:
 {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
 === AVAILABLE ACTIONS ===
 {{KEY:AVAILABLE_METHODS}}
@ -82,6 +83,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
 - Be the next logical incremental step (not complete entire objective in one step)
 - Target exactly one output format if producing files
 - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
 - ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
 - Learn from previous validation feedback and avoid repeated mistakes
 - Include intent analysis fields (dataType, expectedFormats, qualityRequirements, successCriteria)
@ -97,7 +99,7 @@ Return ONLY JSON (no markdown, no explanations). The chosen action MUST:
    "successCriteria": ["specific criterion 1", "specific criterion 2"],
    "userMessage": "User-friendly message in language '{{KEY:USER_LANGUAGE}}' explaining what this action will do (1 sentence, first person, friendly tone)",
    "learnings": ["..."],
-    "requiredInputDocuments": ["docList:..."],
+    "requiredInputDocuments": ["docItem:<documentId>:<filename>", "docList:<label>"],
    "requiredConnection": "connection:..." | null,
    "parametersContext": "concise text that Stage 2 will use to set business parameters"
 }}
@ -115,6 +117,9 @@ Analyze actionObjective to determine:
 3. parametersContext: short, sufficient for Stage 2
 4. Return ONLY JSON - no markdown, no explanations
 5. requiredInputDocuments: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
   - For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
   - For document lists: use docList:<label> format
   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
 6. requiredConnection: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX
 7. Plan incrementally: one output format per step
 8. Learn from validation feedback - avoid repeating mistakes
@ -307,6 +312,7 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
        PromptPlaceholder(label="REVIEW_CONTENT", content=reviewContent, summaryAllowed=True),
        PromptPlaceholder(label="AVAILABLE_METHODS", content=extractAvailableMethods(services), summaryAllowed=False),
        PromptPlaceholder(label="AVAILABLE_DOCUMENTS_INDEX", content=extractAvailableDocumentsIndex(services, context), summaryAllowed=True),
        PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
    ]
    template = """TASK DECISION
@ -321,7 +327,9 @@ def generateDynamicRefinementPrompt(services, context: Any, reviewContent: str)
 === AVAILABLE RESOURCES ===
 ACTIONS: {{KEY:AVAILABLE_METHODS}}
-DOCUMENTS: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
+AVAILABLE_DOCUMENTS_INDEX: {{KEY:AVAILABLE_DOCUMENTS_INDEX}}
 AVAILABLE_CONNECTIONS_INDEX:
 {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
 {{KEY:REVIEW_CONTENT}}
@ -334,12 +342,20 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
 - Next action should ONLY generate the MISSING part, NOT repeat what's already delivered
 === OUTPUT FORMAT ===
 Return ONLY JSON (no markdown, no explanations). The decision MUST:
 - Use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
 - ALWAYS use FULL document references with filename: docItem:<documentId>:<filename> (filename is required)
 - Use ONLY exact labels from AVAILABLE_CONNECTIONS_INDEX (connection:...)
 - Provide concrete parameter values in nextActionParameters (not placeholders)
 - Match parameter names exactly as defined in AVAILABLE_METHODS
 {{
  "status": "continue",
  "reason": "Brief reason explaining why continuing",
  "nextAction": "Selected_action_from_ACTIONS",
  "nextActionParameters": {{
-    "documentList": ["docItem:reference_from_DOCUMENTS"],
+    "documentList": ["docItem:<documentId>:<filename>", "docList:<label>"],
    "connectionReference": "connection:reference_from_AVAILABLE_CONNECTIONS_INDEX",
    "parameter1": "value1",
    "parameter2": "value2"
  }},
@ -347,16 +363,21 @@ CRITICAL: Use structureComparison and gap information from CONTENT VALIDATION to
 }}
 === RULES ===
- If "continue": MUST provide nextAction and nextActionParameters
+1. Return ONLY JSON - no markdown, no explanations
- nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
+2. If "continue": MUST provide nextAction and nextActionParameters
- nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
+3. nextAction: SPECIFIC action from AVAILABLE_METHODS (do not invent)
- documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent)
+4. nextActionParameters: concrete parameters (check AVAILABLE_METHODS for valid names)
- nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
+5. documentList: ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (do not invent/modify)
- CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
+   - For individual documents: ALWAYS use docItem:<documentId>:<filename> format (include filename)
- Do NOT repeat failed actions - suggest DIFFERENT approach
+   - For document lists: use docList:<label> format
- If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
+   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX (including filename)
- nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
+6. connectionReference: ONLY exact label from AVAILABLE_CONNECTIONS_INDEX (required if action needs connection)
- If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
+7. nextActionObjective: describe what this action will achieve based on the FIRST improvement suggestion from CONTENT VALIDATION
 8. CRITICAL: Use structureComparison.gap to specify the missing part in nextActionParameters
 9. Do NOT repeat failed actions - suggest DIFFERENT approach
 10. If ACTION HISTORY shows repeated actions, suggest a fundamentally different approach
 11. nextActionObjective must directly address the highest priority improvement suggestion from CONTENT VALIDATION
 12. If validation shows partial data delivered, next action should CONTINUE from where it stopped, not restart
 """
--- a/modules/workflows/processing/workflowProcessor.py
+++ b/modules/workflows/processing/workflowProcessor.py
@ -428,7 +428,7 @@ class WorkflowProcessor:
            )
            # Prepare AI call options for fast path (balanced, fast processing)
-            from modules.datamodels.datamodelAi import AiCallOptions
+            from modules.datamodels.datamodelAi import AiCallOptions, AiCallRequest
            options = AiCallOptions(
                operationType=OperationTypeEnum.DATA_ANALYSE,
@ -438,16 +438,19 @@ class WorkflowProcessor:
                maxProcessingTime=15  # Fast path should complete in 15s
            )
-            # Call AI (content call - no documents needed for fast path)
+            # Call AI directly (no document generation - just plain text response)
-            aiResponse = await self.services.ai.callAiContent(
+            # Use aiObjects.call() instead of callAiContent() to avoid document generation path
            aiRequest = AiCallRequest(
                prompt=fastPathPrompt,
-                contentParts=None,  # Fast path doesn't process documents
+                context="",
                options=options,
-                outputFormat=None  # Text response, not document generation
+                contentParts=None  # Fast path doesn't process documents
            )
-            # Extract response content (AiResponse.content is a string)
+            aiCallResponse = await self.services.ai.aiObjects.call(aiRequest)
-            responseText = aiResponse.content if isinstance(aiResponse, str) else (aiResponse.content if hasattr(aiResponse, 'content') else str(aiResponse))
+            
            # Extract response content (AiCallResponse.content is a string)
            responseText = aiCallResponse.content if aiCallResponse.content else ""
            # Create ActionResult with response
            # For fast path, we create a simple text document with the response
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -162,30 +162,38 @@ class WorkflowManager:
            self.workflowProcessor = WorkflowProcessor(self.services)
-            # Process user-uploaded documents from userInput for complexity detection
+            # Get workflow mode to determine if complexity detection is needed
-            # This is the correct way: use the input data directly, not workflow state
+            workflowMode = getattr(self.services.workflow, 'workflowMode', None)
-            documents = []
+            skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION)
            if userInput.listFileId:
                try:
                    documents = await self._processFileIds(userInput.listFileId, None)
                except Exception as e:
                    logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
-            # Detect complexity (AI-based semantic understanding) using user input documents
+            if skipComplexityDetection:
-            complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
+                logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan")
-            logger.info(f"Request complexity detected: {complexity}")
+                complexity = "moderate"  # Default for automation workflows
            else:
                # Process user-uploaded documents from userInput for complexity detection
                # This is the correct way: use the input data directly, not workflow state
                documents = []
                if userInput.listFileId:
                    try:
                        documents = await self._processFileIds(userInput.listFileId, None)
                    except Exception as e:
                        logger.warning(f"Failed to process user fileIds for complexity detection: {e}")
                # Detect complexity (AI-based semantic understanding) using user input documents
                complexity = await self.workflowProcessor.detectComplexity(userInput.prompt, documents)
                logger.info(f"Request complexity detected: {complexity}")
            # Now send the first message (which will also process the documents again, but that's fine)
            await self._sendFirstMessage(userInput)
-            # Route to fast path for simple requests
+            # Route to fast path for simple requests (skip for automation mode)
-            if complexity == "simple":
+            if not skipComplexityDetection and complexity == "simple":
                logger.info("Routing to fast path for simple request")
                await self._executeFastPath(userInput, documents)
                return  # Fast path completes the workflow
-            # Route to full workflow for moderate/complex requests
+            # Route to full workflow for moderate/complex requests or automation mode
-            logger.info(f"Routing to full workflow for {complexity} request")
+            logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else ""))
            taskPlan = await self._planTasks(userInput)
            await self._executeTasks(taskPlan)
            await self._processWorkflowResults()