Workflow end 2 End Validated - Start Variant Testing

2025-10-14 21:20:02 +02:00 · 2025-10-14 21:20:02 +02:00 · e0afc72e13
commit e0afc72e13
parent bdc87eb5c6
13 changed files with 541 additions and 285 deletions
--- a/modules/routes/routeSecurityLocal.py
+++ b/modules/routes/routeSecurityLocal.py
@ -263,8 +263,7 @@ async def read_user_me(
@limiter.limit("60/minute")
 async def refresh_token(
    request: Request,
-    response: Response,
+    response: Response
    currentUser: User = Depends(getCurrentUser)
 ) -> Dict[str, Any]:
    """Refresh access token using refresh token from cookie"""
    try:
@ -283,12 +282,27 @@ async def refresh_token(
        except jwt.JWTError:
            raise HTTPException(status_code=401, detail="Invalid refresh token")
        # Get user information from refresh token payload
        user_id = payload.get("userId")
        if not user_id:
            raise HTTPException(status_code=401, detail="Invalid refresh token - missing user ID")
        # Get user from database using the user ID from refresh token
        try:
            app_interface = getRootInterface()
            current_user = app_interface.getUser(user_id)
            if not current_user:
                raise HTTPException(status_code=401, detail="User not found")
        except Exception as e:
            logger.error(f"Failed to get user from database: {str(e)}")
            raise HTTPException(status_code=500, detail="Failed to validate user")
        # Create new token data
        token_data = {
-            "sub": currentUser.username,
+            "sub": current_user.username,
-            "mandateId": str(currentUser.mandateId),
+            "mandateId": str(current_user.mandateId),
-            "userId": str(currentUser.id),
+            "userId": str(current_user.id),
-            "authenticationAuthority": currentUser.authenticationAuthority
+            "authenticationAuthority": current_user.authenticationAuthority
        }
        # Create new access token + set cookie
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@ -345,7 +345,7 @@ class SubCoreAi:
                options=options
            )
            response = await self.aiObjects.call(request)
-            result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "type": "paragraph", "data": {"text": response.content}}]}
+            result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "content_type": "paragraph", "elements": [{"text": response.content}]}]}
        # Convert single-file result to multi-file format if needed
        if "sections" in result and "documents" not in result:
--- a/modules/services/serviceAi/subDocumentGeneration.py
+++ b/modules/services/serviceAi/subDocumentGeneration.py
@ -77,7 +77,8 @@ class SubDocumentGeneration:
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions,
        outputFormat: str,
-        title: Optional[str]
+        title: Optional[str],
        generationPrompt: Optional[str] = None
        ) -> Dict[str, Any]:
        """Handle single-file document generation (existing functionality)."""
        try:
@ -125,9 +126,72 @@ class SubDocumentGeneration:
            except Exception:
                parsedFilename = None
-            # Render the JSON content to the specified format
+            # Use AI generation to enhance the extracted JSON before rendering
            enhancedContent = aiResponseJson  # Default to original
            if prompt:
                try:
                    from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
                    # Get generation prompt
                    generationPrompt = await generation_service.getGenerationPrompt(
                        outputFormat=outputFormat,
                        userPrompt=prompt,
                        title=title,
                        aiService=self
                    )
                    # Prepare the AI call
                    request_options = AiCallOptions()
                    request_options.operationType = OperationType.GENERAL
                    # Create context with the extracted JSON content
                    import json
                    context = f"Extracted JSON content:\n{json.dumps(aiResponseJson, indent=2)}"
                    request = AiCallRequest(
                        prompt=generationPrompt,
                        context=context,
                        options=request_options
                    )
                    # Call AI to enhance the content
                    response = await self.aiObjects.call(request)
                    if response and response.content:
                        # Parse the AI response as JSON
                        try:
                            import re
                            result = response.content.strip()
                            # Extract JSON from markdown if present
                            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
                            if json_match:
                                result = json_match.group(1).strip()
                            elif result.startswith('```json'):
                                result = re.sub(r'^```json\s*', '', result)
                                result = re.sub(r'\s*```$', '', result)
                            elif result.startswith('```'):
                                result = re.sub(r'^```\s*', '', result)
                                result = re.sub(r'\s*```$', '', result)
                            # Try to parse JSON
                            enhancedContent = json.loads(result)
                            logger.info(f"AI enhanced JSON content successfully")
                        except json.JSONDecodeError as e:
                            logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
                            enhancedContent = aiResponseJson
                    else:
                        logger.warning("AI generation returned empty response, using original content")
                        enhancedContent = aiResponseJson
                except Exception as e:
                    logger.warning(f"AI generation failed: {str(e)}, using original content")
                    enhancedContent = aiResponseJson
            # Render the enhanced JSON content
            renderedContent, mimeType = await generation_service.renderReport(
-                extractedContent=aiResponseJson,
+                extractedContent=enhancedContent,
                outputFormat=outputFormat,
                title=title,
                userPrompt=prompt,
@ -232,11 +296,8 @@ class SubDocumentGeneration:
                    # Convert AI format to renderer format
                    transformed_section = {
                        "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
-                        "type": section.get("content_type", "paragraph"),
+                        "content_type": section.get("content_type", "paragraph"),
-                        "data": {
+                        "elements": section.get("elements", []),
                            "text": "",
                            "elements": section.get("elements", [])
                        },
                        "order": section.get("order", len(transformed_sections) + 1)
                    }
@ -246,7 +307,11 @@ class SubDocumentGeneration:
                        for element in section.get("elements", []):
                            if "text" in element:
                                text_parts.append(element["text"])
-                        transformed_section["data"]["text"] = "\n".join(text_parts)
+                        # Add text to the first element or create a new one
                        if transformed_section["elements"]:
                            transformed_section["elements"][0]["text"] = "\n".join(text_parts)
                        else:
                            transformed_section["elements"] = [{"text": "\n".join(text_parts)}]
                    transformed_sections.append(transformed_section)
@ -264,8 +329,72 @@ class SubDocumentGeneration:
                    "tags": ["multi_file", "ai_generated"]
                }
                # Use AI generation to enhance the extracted JSON before rendering
                enhancedContent = complete_document  # Default to original
                if prompt:
                    try:
                        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
                        # Get generation prompt
                        generationPrompt = await generation_service.getGenerationPrompt(
                            outputFormat=outputFormat,
                            userPrompt=prompt,
                            title=doc_data["title"],
                            aiService=self
                        )
                        # Prepare the AI call
                        request_options = AiCallOptions()
                        request_options.operationType = OperationType.GENERAL
                        # Create context with the extracted JSON content
                        import json
                        context = f"Extracted JSON content:\n{json.dumps(complete_document, indent=2)}"
                        request = AiCallRequest(
                            prompt=generationPrompt,
                            context=context,
                            options=request_options
                        )
                        # Call AI to enhance the content
                        response = await self.aiObjects.call(request)
                        if response and response.content:
                            # Parse the AI response as JSON
                            try:
                                import re
                                result = response.content.strip()
                                # Extract JSON from markdown if present
                                json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
                                if json_match:
                                    result = json_match.group(1).strip()
                                elif result.startswith('```json'):
                                    result = re.sub(r'^```json\s*', '', result)
                                    result = re.sub(r'\s*```$', '', result)
                                elif result.startswith('```'):
                                    result = re.sub(r'^```\s*', '', result)
                                    result = re.sub(r'\s*```$', '', result)
                                # Try to parse JSON
                                enhancedContent = json.loads(result)
                                logger.info(f"AI enhanced JSON content successfully")
                            except json.JSONDecodeError as e:
                                logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
                                enhancedContent = complete_document
                        else:
                            logger.warning("AI generation returned empty response, using original content")
                            enhancedContent = complete_document
                    except Exception as e:
                        logger.warning(f"AI generation failed: {str(e)}, using original content")
                        enhancedContent = complete_document
                # Render the enhanced JSON content
                rendered_content, mime_type = await generation_service.renderReport(
-                    extractedContent=complete_document,
+                    extractedContent=enhancedContent,
                    outputFormat=outputFormat,
                    title=doc_data["title"],
                    userPrompt=prompt,
@ -477,9 +606,7 @@ Return only the JSON response.
        """
        try:
            services = self.services
-            workflow = getattr(services, 'currentWorkflow', None)
+            workflow = services.currentWorkflow
            if not workflow:
                return
            # Serialize payload
            import json as _json
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
@ -181,9 +181,8 @@ class SubDocumentProcessing:
                from modules.services.serviceNormalization.mainServiceNormalization import NormalizationService
                normalizer = NormalizationService(self.services)
                inventory = normalizer.discoverStructures(mergedJsonDocument)
-                # Use workflow id if available as cache key, else default
+                # Use workflow id as cache key
-                cacheKey = getattr(self.services, 'currentWorkflow', None)
+                cacheKey = self.services.currentWorkflow.id
                cacheKey = getattr(cacheKey, 'id', 'workflow_run') if cacheKey else 'workflow_run'
                # Provide the extraction/merge prompt context when available to help mapping
                mergePrompt = prompt
                mapping = await normalizer.requestHeaderMapping(inventory, cacheKey, None, mergePrompt)
@ -476,8 +475,8 @@ class SubDocumentProcessing:
                                "metadata": {"title": f"Image Analysis - Chunk {chunk_index}"},
                                "sections": [{
                                    "id": f"image_section_{chunk_index}",
-                                    "type": "paragraph",
+                                    "content_type": "paragraph",
-                                    "data": {"text": fallback_content}
+                                    "elements": [{"text": fallback_content}]
                                }]
                            })
                            self.services.utils.debugLogToFile(f"Created fallback JSON for image chunk {chunk_index} with actual content", "AI_SERVICE")
@ -583,8 +582,8 @@ class SubDocumentProcessing:
                                    "metadata": {"title": f"Document Analysis - Chunk {chunk_index}"},
                                    "sections": [{
                                        "id": f"analysis_section_{chunk_index}",
-                                        "type": "paragraph",
+                                        "content_type": "paragraph",
-                                        "data": {"text": fallback_content}
+                                        "elements": [{"text": fallback_content}]
                                    }]
                                })
                                self.services.utils.debugLogToFile(f"Created fallback JSON for container chunk {chunk_index} with actual content", "AI_SERVICE")
@ -676,8 +675,8 @@ class SubDocumentProcessing:
                                "metadata": {"title": "Error Section"},
                                "sections": [{
                                    "id": f"error_section_{chunk_index}",
-                                    "type": "paragraph",
+                                    "content_type": "paragraph",
-                                    "data": {"text": f"Error parsing JSON: {str(e)}"}
+                                    "elements": [{"text": f"Error parsing JSON: {str(e)}"}]
                                }]
                            })
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -1,5 +1,6 @@
 import logging
 import uuid
 import json
 from typing import Any, Dict, List, Optional, Union, Tuple
 from datetime import datetime, UTC
 import re
@ -339,24 +340,8 @@ class GenerationService:
            if not renderer:
                raise ValueError(f"Unsupported output format: {outputFormat}")
-            # Generate AI-based generation prompt if AI service is available
+            # Render the JSON content directly (AI generation handled by main service)
-            generationPrompt = userPrompt  # Default to user prompt
+            renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
            if aiService and userPrompt:
                try:
                    from .subPromptBuilder import buildGenerationPrompt
                    generationPrompt = await buildGenerationPrompt(
                        outputFormat=outputFormat,
                        userPrompt=userPrompt,
                        title=title,
                        aiService=aiService,
                        services=self.services
                    )
                except Exception as e:
                    logger.warning(f"Failed to generate AI-based generation prompt: {str(e)}, using user prompt")
                    generationPrompt = userPrompt
            # Render the JSON content with AI-generated prompt
            renderedContent, mimeType = await renderer.render(extractedContent, title, generationPrompt, aiService)
            # DEBUG: dump rendered output
            try:
                import os
@ -391,6 +376,23 @@ class GenerationService:
            services=self.services
        )
    async def getGenerationPrompt(
        self,
        outputFormat: str,
        userPrompt: str,
        title: str,
        aiService=None
    ) -> str:
        """Get generation prompt for enhancing extracted JSON content."""
        from .subPromptBuilder import buildGenerationPrompt
        return await buildGenerationPrompt(
            outputFormat=outputFormat,
            userPrompt=userPrompt,
            title=title,
            aiService=aiService,
            services=self.services
        )
    async def getGenericExtractionPrompt(
        self,
        outputFormat: str,
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@ -81,11 +81,11 @@ class BaseRenderer(ABC):
        if not isinstance(sections, list):
            return False
-        # Validate each section has type and data
+        # Validate each section has content_type and elements
        for section in sections:
            if not isinstance(section, dict):
                return False
-            if "type" not in section or "data" not in section:
+            if "content_type" not in section or "elements" not in section:
                return False
        return True
@ -159,7 +159,7 @@ class BaseRenderer(ABC):
        # Base implementation returns a simple dict
        # Format-specific renderers should override this method
        return {
-            "type": "image",
+            "content_type": "image",
            "base64Data": base64_data,
            "altText": alt_text,
            "width": section_data.get("width", None),
@ -259,25 +259,25 @@ class BaseRenderer(ABC):
        if section_type == "table":
            headers, rows = self._extract_table_data(section_data)
-            return {"type": "table", "headers": headers, "rows": rows}
+            return {"content_type": "table", "headers": headers, "rows": rows}
        elif section_type == "bullet_list":
            items = self._extract_bullet_list_items(section_data)
-            return {"type": "bullet_list", "items": items}
+            return {"content_type": "bullet_list", "items": items}
        elif section_type == "heading":
            level, text = self._extract_heading_data(section_data)
-            return {"type": "heading", "level": level, "text": text}
+            return {"content_type": "heading", "level": level, "text": text}
        elif section_type == "paragraph":
            text = self._extract_paragraph_text(section_data)
-            return {"type": "paragraph", "text": text}
+            return {"content_type": "paragraph", "text": text}
        elif section_type == "code_block":
            code, language = self._extract_code_block_data(section_data)
-            return {"type": "code_block", "code": code, "language": language}
+            return {"content_type": "code_block", "code": code, "language": language}
        elif section_type == "image":
            base64_data, alt_text = self._extract_image_data(section_data)
            # Validate image data
            if self._validate_image_data(base64_data, alt_text):
                return {
-                    "type": "image", 
+                    "content_type": "image", 
                    "base64Data": base64_data, 
                    "altText": alt_text,
                    "width": section_data.get("width"),
@ -286,11 +286,11 @@ class BaseRenderer(ABC):
                }
            else:
                # Return placeholder if image data is invalid
-                return {"type": "paragraph", "text": f"[Image: {alt_text}]"}
+                return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
        else:
            # Fallback to paragraph
            text = self._extract_paragraph_text(section_data)
-            return {"type": "paragraph", "text": text}
+            return {"content_type": "paragraph", "text": text}
    def _format_timestamp(self, timestamp: str = None) -> str:
        """Format timestamp for display."""
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@ -38,7 +38,7 @@ class RendererJson(BaseRenderer):
            # Return minimal JSON fallback
            fallback_data = {
                "title": title,
-                "sections": [{"type": "paragraph", "data": {"text": f"Error rendering report: {str(e)}"}}],
+                "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
                "metadata": {"error": str(e)}
            }
            return json.dumps(fallback_data, indent=2), "application/json"
@ -54,7 +54,7 @@ class RendererJson(BaseRenderer):
            if "sections" not in content:
                # Convert old format to new format
                content = {
-                    "sections": [{"type": "paragraph", "data": {"text": str(content)}}],
+                    "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
                    "metadata": {"title": title}
                }
@ -73,7 +73,7 @@ class RendererJson(BaseRenderer):
            self.logger.warning(f"Error cleaning JSON content: {str(e)}")
            # Return minimal valid JSON
            fallback_data = {
-                "sections": [{"type": "paragraph", "data": {"text": str(content)}}],
+                "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
                "metadata": {"title": title, "error": str(e)}
            }
            return json.dumps(fallback_data, indent=2, ensure_ascii=False)
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -442,7 +442,7 @@ class RendererXlsx(BaseRenderer):
        sheet_names = []
        # Check if we have multiple table sections
-        table_sections = [s for s in sections if s.get("type") == "table"]
+        table_sections = [s for s in sections if s.get("content_type") == "table"]
        if len(table_sections) > 1:
            # Create separate sheets for each table
@ -480,7 +480,7 @@ class RendererXlsx(BaseRenderer):
                return
            sections = json_content.get("sections", [])
-            table_sections = [s for s in sections if s.get("type") == "table"]
+            table_sections = [s for s in sections if s.get("content_type") == "table"]
            if len(table_sections) > 1:
                # Multiple tables - populate each sheet with its corresponding table
@ -509,10 +509,15 @@ class RendererXlsx(BaseRenderer):
            sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
            sheet['A1'].alignment = Alignment(horizontal="center")
-            # Get table data
+            # Get table data from elements (canonical JSON format)
-            table_data = section.get("data", {})
+            elements = section.get("elements", [])
-            headers = table_data.get("headers", [])
+            if elements and isinstance(elements, list) and len(elements) > 0:
-            rows = table_data.get("rows", [])
+                table_data = elements[0]
                headers = table_data.get("headers", [])
                rows = table_data.get("rows", [])
            else:
                headers = []
                rows = []
            if not headers and not rows:
                sheet['A3'] = "No table data available"
@ -683,9 +688,9 @@ class RendererXlsx(BaseRenderer):
    def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a table element to Excel sheet."""
        try:
-            table_data = element.get("data", {})
+            # In canonical JSON format, table elements have headers and rows directly
-            headers = table_data.get("headers", [])
+            headers = element.get("headers", [])
-            rows = table_data.get("rows", [])
+            rows = element.get("rows", [])
            if not headers and not rows:
                return start_row
@ -697,7 +702,7 @@ class RendererXlsx(BaseRenderer):
                if header_style.get("bold"):
                    cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
                if header_style.get("background"):
-                    cell.fill = PatternFill(start_color=header_style["background"], end_color=header_style["background"], fill_type="solid")
+                    cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
            start_row += 1
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@ -1,21 +1,21 @@
 """
-Centralized prompt builder for document generation across formats.
+Prompt builder for AI document generation and extraction.
-
+This module builds prompts for AI services to extract and generate documents.
 Builds a robust prompt that:
 - Accepts any user intent (no fixed structure assumptions)
 - Injects format-specific guidelines from the selected renderer
 - Adds a common policy section to always use real data from source docs
 - Requires the AI to output a filename header that we can parse and use
 """
 import json
-from typing import Protocol, Dict, Any
+import logging
 from typing import Dict, Any, Optional, List, TYPE_CHECKING
 from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
 # Type hint for renderer parameter
 if TYPE_CHECKING:
    from .renderers.rendererBaseTemplate import BaseRenderer
    _RendererLike = BaseRenderer
 else:
    _RendererLike = Any
-class _RendererLike(Protocol):
+logger = logging.getLogger(__name__)
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:  # returns only format-specific guidelines
        ...
 async def buildAdaptiveExtractionPrompt(
    outputFormat: str,
@ -25,57 +25,65 @@ async def buildAdaptiveExtractionPrompt(
    aiService=None,
    services=None
 ) -> str:
-    """Build adaptive extraction prompt based on AI analysis."""
+    """
    Build adaptive extraction prompt based on AI analysis.
    Uses multi-file or single-file approach based on analysis.
    """
-    # Get appropriate JSON schema based on analysis
+    # Multi-file example data instead of schema
-    from .subJsonSchema import get_adaptive_json_schema
+    multi_file_example = {
-    json_schema = get_adaptive_json_schema(promptAnalysis)
+        "metadata": {
            "title": "Multi-Document Example",
            "splitStrategy": "by_section",
            "source_documents": ["doc_001"],
            "extraction_method": "ai_extraction"
        },
        "documents": [
            {
                "id": "doc_section_1",
                "title": "Section 1 Title",
                "filename": "section_1.xlsx",
                "sections": [
                    {
                        "id": "table_1",
                        "content_type": "table",
                        "elements": [
                            {
                                "headers": ["Column 1", "Column 2"],
                                "rows": [["Value 1", "Value 2"]]
                            }
                        ],
                        "order": 1
                    }
                ]
            }
        ]
    }
    # Single-file example data instead of schema
    single_file_example = {
        "metadata": {
            "title": "Single Document Example",
            "source_documents": ["doc_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "table_1",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2"],
                        "rows": [["Value 1", "Value 2"]]
                    }
                ],
                "order": 1
            }
        ]
    }
    if promptAnalysis.get("is_multi_file", False):
-        schema_type = "multi-document"
+        # Multi-file prompt
    else:
        schema_type = "single-document"
    # Build adaptive prompt using AI analysis - match single-file style
    if promptAnalysis.get("is_multi_file", False):
        # Multi-file prompt - use simple example format like single-file
        multi_file_example = {
            "metadata": {
                "title": "REPLACE_WITH_ACTUAL_DOCUMENT_TITLE",
                "splitStrategy": "by_section"
            },
            "documents": [
                {
                    "id": "doc_1",
                    "title": "REPLACE_WITH_ACTUAL_SECTION_TITLE",
                    "filename": "REPLACE_WITH_ACTUAL_FILENAME",
                    "sections": [
                        {
                            "id": "section_1",
                            "content_type": "heading",
                            "elements": [
                                {
                                    "text": "REPLACE_WITH_ACTUAL_HEADING_TEXT",
                                    "level": 1
                                }
                            ],
                            "order": 1
                        },
                        {
                            "id": "section_2",
                            "content_type": "paragraph",
                            "elements": [
                                {
                                    "text": "REPLACE_WITH_ACTUAL_PARAGRAPH_CONTENT"
                                }
                            ],
                            "order": 2
                        }
                    ]
                }
            ]
        }
        adaptive_prompt = f"""
 {userPrompt}
@ -134,16 +142,31 @@ Return only the JSON structure with actual data from the documents. Do not inclu
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 """.strip()
    else:
-        # Single-file prompt - use original style
+        # Single-file prompt - use example data instead of schema
        adaptive_prompt = f"""
 {userPrompt}
-You are extracting structured content from documents and must respond with valid JSON only.
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
-IMPORTANT: You must respond with valid JSON only. No additional text, explanations, or formatting outside the JSON structure.
+TASK: Extract the actual content from the document and organize it into structured sections.
-Extract the actual data from the source documents and structure it as JSON with this format:
+REQUIREMENTS:
-{json.dumps(json_schema, indent=2)}
+1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(single_file_example, indent=2)}
 INSTRUCTIONS:
 - Replace example data with actual content from the document
 - Use actual headings, paragraphs, and text from the document
 - Ensure all content is properly structured
 - Do not use generic placeholder text
 - Extract real content from the documents
 CONTEXT (Document Content):
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
@ -220,22 +243,53 @@ Consider the user's intent and the most logical way to organize the extracted co
            services.utils.debugLogToFile(f"Generic prompt analysis failed: {str(e)}", "PROMPT_BUILDER")
    # Fallback to single-file prompt
-    from .subJsonSchema import get_document_subJsonSchema
+    example_data = {
-    json_schema = get_document_subJsonSchema()
+        "metadata": {
            "title": "Example Document",
            "author": "AI Assistant",
            "source_documents": ["document_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_001",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2", "Column 3"],
                        "rows": [
                            ["Value 1", "Value 2", "Value 3"],
                            ["Value 4", "Value 5", "Value 6"]
                        ]
                    }
                ],
                "order": 1,
                "metadata": {}
            }
        ],
        "summary": "",
        "tags": []
    }
    return f"""
 {userPrompt}
-You are extracting structured content from documents and must respond with valid JSON only.
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
-CRITICAL: You must respond with valid JSON only. No additional text, explanations, markdown formatting, code blocks, or any other content outside the JSON structure. Do not use ``` markers or any other formatting.
+TASK: Extract the actual content from the document and organize it into structured sections.
-Extract the actual data from the source documents and structure it as JSON with this format:
+REQUIREMENTS:
-{json.dumps(json_schema, indent=2)}
+1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(example_data, indent=2)}
 Requirements:
 - Preserve all original data - do not summarize or interpret
- Use the exact JSON schema provided
+- Use the exact JSON format shown above
 - Maintain data integrity and structure
 Content Types to Extract:
@ -286,16 +340,55 @@ async def buildExtractionPrompt(
    from .subJsonSchema import get_document_subJsonSchema
    jsonSchema = get_document_subJsonSchema()
-    # Generic block for JSON extraction - use proper schema instead of hardcoded template
+    # Generic block for JSON extraction - use example data instead of schema
    example_data = {
        "metadata": {
            "title": "Example Document",
            "author": "AI Assistant",
            "source_documents": ["document_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_001",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2", "Column 3"],
                        "rows": [
                            ["Value 1", "Value 2", "Value 3"],
                            ["Value 4", "Value 5", "Value 6"]
                        ]
                    }
                ],
                "order": 1,
                "metadata": {}
            }
        ],
        "summary": "",
        "tags": []
    }
    genericIntro = f"""
 {extractionIntent}
-You are extracting structured content from documents and must respond with valid JSON only.
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
-CRITICAL: You must respond with valid JSON only. No additional text, explanations, markdown formatting, code blocks, or any other content outside the JSON structure. Do not use ``` markers or any other formatting.
+TASK: Extract the actual content from the document and organize it into structured sections.
-Extract the actual data from the source documents and structure it as JSON with this format:
+REQUIREMENTS:
-{json.dumps(jsonSchema, indent=2)}
+1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(example_data, indent=2)}
 Requirements:
 - Preserve all original data - do not summarize or interpret
 - Use the exact JSON format shown above
 - Maintain data integrity and structure
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
@ -317,15 +410,20 @@ Return only the JSON structure with actual data from the documents. Do not inclu
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 DO NOT return a schema description - return actual extracted content in the JSON format shown above.
-""".strip()
+"""
-    # Final assembly
+    # Get format-specific guidelines from renderer
-    finalPrompt = genericIntro
+    formatGuidelines = ""
    try:
        if hasattr(renderer, 'getExtractionGuidelines'):
            formatGuidelines = renderer.getExtractionGuidelines()
    except Exception:
        pass
    # Combine all parts
    finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
-    # Debug output
+    # Save extraction prompt to debug file - only if debug enabled
    services.utils.debugLogToFile(f"EXTRACTION INTENT: Processed", "PROMPT_BUILDER")
    # Save full extraction prompt to debug file - only if debug enabled
    try:
        debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
        if debug_enabled:
@ -335,8 +433,7 @@ DO NOT return a schema description - return actual extracted content in the JSON
            debug_root = "./test-chat/ai"
            os.makedirs(debug_root, exist_ok=True)
            with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
-                f.write(f"EXTRACTION PROMPT:\n{finalPrompt}\n\n")
+                f.write(finalPrompt)
                f.write(f"EXTRACTION INTENT:\n{extractionIntent}\n")
    except Exception:
        pass
@ -367,24 +464,46 @@ async def buildGenerationPrompt(
        # AI call to generate the appropriate generation prompt
        generationPromptRequest = f"""
-Based on this user request, create a detailed generation prompt for creating a {outputFormat} document.
+You are creating instructions for an AI to generate JSON content in the CANONICAL FORMAT that will be converted to a {outputFormat} document.
 User request: "{safeUserPrompt}"
 Document title: "{title}"
-Output format: {outputFormat}
+Target format: {outputFormat}
-Create a generation prompt that:
+Write clear, detailed instructions that tell the AI how to generate JSON content using the CANONICAL JSON FORMAT. Focus on:
 1. Identifies what content is most important for the user
 2. Specifies how to structure and organize the content
 3. Includes any specific formatting or presentation requirements
 4. Preserves any language requirements
 5. Ensures the document meets the user's needs
-IMPORTANT: Always generate content in STANDARDIZED JSON FORMAT. In your response, include the exact text "PLACEHOLDER_FOR_FORMAT_RULES" where specific format rules will be inserted afterwards automatically.
+1. What content is most important for the user
 2. How to structure and organize the content using the canonical JSON format with 'sections'
 3. Specific formatting requirements for the target format
 4. Language requirements to preserve
 5. How to ensure the JSON content meets the user's needs
-CRITICAL: You MUST start your response with exactly "Generate a {outputFormat} document that:" - do NOT use "docx" or any other format. Use the exact format specified: {outputFormat}
+CRITICAL: The AI MUST generate content using the CANONICAL JSON FORMAT with this exact structure:
 {{
  "metadata": {{
    "title": "Document Title"
  }},
  "sections": [
    {{
      "id": "section_1",
      "content_type": "table",
      "elements": [
        {{
          "headers": ["Column1", "Column2", "Column3"],
          "rows": [
            ["Value1", "Value2", "Value3"],
            ["Value4", "Value5", "Value6"]
          ]
        }}
      ],
      "order": 1
    }}
  ]
 }}
-Return only the generation prompt, starting with "Generate a {outputFormat} document that..."
+The AI should NOT create format-specific structures like "sheets" or "columns" - only use the canonical format with "sections" and "elements".
 Write the instructions as plain text, not JSON. Start with "Generate JSON content that..." and provide clear, actionable instructions for creating structured JSON data in the canonical format.
 """
        # Call AI service to generate the prompt
@ -423,7 +542,7 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu
        except Exception:
            pass
-        return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"
+        return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
    except Exception as e:
        # Fallback on any error - preserve user prompt for language instructions
@ -433,105 +552,104 @@ Return only the generation prompt, starting with "Generate a {outputFormat} docu
 def _getFormatRules(outputFormat: str) -> str:
    """
-    Get format-specific rules for JSON-based generation.
+    Get format-specific rules for the generation prompt.
    Since we now use standardized JSON, all formats follow the same rules.
    """
-    return """
+    format_rules = {
- Generate content in standardized JSON format following the document schema
+        "xlsx": """
- Tables: Use JSON table format with headers and rows arrays
+XLSX Format Rules:
- Lists: Use JSON list format with items array
+- Create tables with clear headers and organized data
- Text: Use JSON paragraph format with text field
+- Use appropriate column widths and formatting
- Headings: Use JSON heading format with level field
+- Include summary information if relevant
- Structure: Follow the document JSON schema exactly
+- Ensure data is properly structured for spreadsheet analysis
-""".strip()
+""",
        "pdf": """
 PDF Format Rules:
 - Create professional document layout
 - Use appropriate headings and sections
 - Include proper spacing and formatting
 - Ensure content is well-organized and readable
 """,
        "docx": """
 DOCX Format Rules:
 - Create professional document layout
 - Use appropriate headings and sections
 - Include proper spacing and formatting
 - Ensure content is well-organized and readable
 """,
        "html": """
 HTML Format Rules:
 - Create clean, semantic HTML structure
 - Use appropriate tags for content organization
 - Include proper styling classes
 - Ensure content is accessible and well-formatted
 """,
        "json": """
 JSON Format Rules:
 - Create well-structured JSON data
 - Use appropriate nesting and organization
 - Include metadata and context information
 - Ensure data is properly formatted and valid
 """,
        "csv": """
 CSV Format Rules:
 - Create clear, organized tabular data
 - Use appropriate headers and data types
 - Ensure proper CSV formatting
 - Include all relevant data in structured format
 """,
        "txt": """
 TXT Format Rules:
 - Create clean, readable text format
 - Use appropriate spacing and organization
 - Include clear headings and sections
 - Ensure content is well-structured and easy to read
 """
    }
    return format_rules.get(outputFormat.lower(), f"""
 {outputFormat.upper()} Format Rules:
 - Create well-structured content appropriate for {outputFormat}
 - Use appropriate formatting and organization
 - Ensure content is clear and professional
 - Include all relevant information in proper format
 """)
 async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
    """
-    Use AI to extract a rich, structured extraction intent from the user prompt.
+    Parse user prompt to extract the core extraction intent.
    Include language, normalization, structure needs, headers, formats, row strategy, and multi-file guidance.
    """
    if not aiService:
-        # Fallback if no AI service available
+        return f"Extract content from the provided documents and create a {outputFormat} report."
        return "Extract all relevant content from the document according to the user's requirements"
    try:
-        # Protect userPrompt from injection by escaping quotes and newlines
+        analysis_prompt = f"""
-        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
+Analyze this user request and extract the core extraction intent:
        # Rich analysis to derive a complete extraction intent and structure guidance
        extractionPrompt = f"""
 Analyze the user's request and produce a RICH extraction intent. Return ONLY JSON.
-Goals:
+User request: "{userPrompt}"
- Detect language and normalize the request into a full, explicit instruction (no summary; preserve all constraints and details).
+Target format: {outputFormat}
 - Decide if structured data is required; if so, define the target structure precisely (headers, order, formats, row strategy).
 - Identify if multi-file output is appropriate and how to split/files name.
-User request: "{safeUserPrompt}"
+Extract the main intent and requirements for document processing. Focus on:
 1. What content needs to be extracted
 2. How it should be organized
 3. Any specific requirements or preferences
-Return JSON in this exact shape:
+Respond with a clear, concise statement of the extraction intent.
 {{
  "detectedLanguage": "de|en|fr|it|...",
  "normalizedRequest": "Full explicit instruction in detected language",
  "requiresStructuredData": true|false,
  "targetStructure": "table|list|mixed|unstructured",
  "table": {{
    "headers": ["Header1", "Header2", "..."],
    "headerOrderStrict": true|false,
    "rowStrategy": "one_row_per_document|one_row_per_entity|one_row_per_vat_rate|custom",
    "formats": {{
      "dateFormat": "DD.MM.YYYY|YYYY-MM-DD|...",
      "amountDecimals": 2,
      "currencyFormat": "code|symbol",
      "idMasking": "none|last4|custom"
    }}
  }},
  "multiFile": true|false,
  "fileSplitStrategy": "single|per_entity|by_section|by_criteria|custom",
  "fileNamingPattern": "suggested pattern for filenames",
  "constraints": ["List of critical constraints to enforce"],
  "reasoning": "Brief justification (one sentence)"
 }}
 Rules:
 - Preserve user terminology and language in normalizedRequest.
 - If the user listed columns/fields, copy them exactly into table.headers and set headerOrderStrict=true.
 - If the user implies separate rows for rates/entities, set an appropriate rowStrategy (e.g., one_row_per_vat_rate).
 - If no structure is required, set requiresStructuredData=false and targetStructure="unstructured".
 """
        # Call AI service to extract intention
        services.utils.debugLogToFile("DEBUG: Calling AI for extraction intent...", "PROMPT_BUILDER")
        # Import and set proper options for AI call
        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
        request_options = AiCallOptions()
        request_options.operationType = OperationType.GENERAL
-        request = AiCallRequest(prompt=extractionPrompt, context="", options=request_options)
+        request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
        response = await aiService.aiObjects.call(request)
        result = response.content if response else ""
        services.utils.debugLogToFile(f"DEBUG: Extraction intent processed", "PROMPT_BUILDER")
        # Try to extract and pretty print JSON
        if result:
            import re, json as _json
            match = re.search(r'\{[\s\S]*\}', result)
            if match:
                try:
                    obj = _json.loads(match.group(0))
                    return _json.dumps(obj, ensure_ascii=False, indent=2)
                except Exception:
                    pass
        # Fallback to previous simple format
        return f"Extract: {safeUserPrompt}"
        if response and response.content:
            return response.content.strip()
        else:
            return f"Extract content from the provided documents and create a {outputFormat} report."
    except Exception as e:
-        # Fallback on any error - preserve user prompt for language instructions
+        services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
-        services.utils.debugLogToFile(f"DEBUG: AI extraction intent failed: {str(e)}", "PROMPT_BUILDER")
+        return f"Extract content from the provided documents and create a {outputFormat} report."
        return f"Extract: {userPrompt}"
--- a/modules/services/serviceNormalization/mainServiceNormalization.py
+++ b/modules/services/serviceNormalization/mainServiceNormalization.py
@ -28,13 +28,17 @@ class NormalizationService:
                continue
            # Extract table data from elements array
            hdrs = []
            rows = []
            for element in section.get("elements", []):
                if isinstance(element, dict) and "headers" in element and "rows" in element:
                    hdrs = element.get("headers") or []
                    rows = element.get("rows") or []
                    break
-            else:
+            
            if not hdrs or not rows:
                continue
            for h in hdrs:
                if not isinstance(h, str):
                    continue
@ -122,13 +126,14 @@ class NormalizationService:
                continue
            # Extract table data from elements array
            sourceHeaders = []
            sourceRows = []
            for element in section.get("elements", []):
                if isinstance(element, dict) and "headers" in element and "rows" in element:
                    sourceHeaders = element.get("headers") or []
                    sourceRows = element.get("rows") or []
                    break
-            else:
+            
                continue
            if not sourceHeaders or not sourceRows:
                continue
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@ -78,11 +78,15 @@ class WorkflowService:
    def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
        """Get ChatDocuments from a list of document references using all three formats."""
        try:
-            # Get the current workflow from services (same pattern as setWorkflowContext)
+            workflow = self.services.currentWorkflow
-            workflow = getattr(self.services, 'currentWorkflow', None) or self.workflow
+            
-            if not workflow:
+            # Reload workflow from database to ensure we have all messages
-                logger.error("No workflow available for document list resolution")
+            if hasattr(workflow, 'id'):
-                return []
+                try:
                    workflow = self.getWorkflow(workflow.id)
                    logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages")
                except Exception as e:
                    logger.warning(f"Could not reload workflow from database: {str(e)}")
            all_documents = []
            for doc_ref in documentList:
@ -418,11 +422,7 @@ class WorkflowService:
    def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
        """Set current workflow context for document generation and routing"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.services, 'currentWorkflow', None) or self.workflow
            if not workflow:
                logger.error("No workflow available for context setting")
                return
            # Prepare update data
            update_data = {}
@ -529,10 +529,7 @@ class WorkflowService:
    def getDocumentCount(self) -> str:
        """Get document count for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.services, 'currentWorkflow', None) or self.workflow
            if not workflow:
                return "No documents available"
            # Count documents from all messages in the workflow (like old system)
            total_docs = 0
@ -551,10 +548,7 @@ class WorkflowService:
    def getWorkflowHistoryContext(self) -> str:
        """Get workflow history context for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.services, 'currentWorkflow', None) or self.workflow
            if not workflow:
                return "No previous round context available"
            # Check if there are any previous rounds by looking for "first" messages
            has_previous_rounds = False
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@ -226,7 +226,7 @@ class ReactMode(BaseMode):
        # Get available documents from the current workflow
        try:
-            available_docs = self.services.workflow.getAvailableDocuments(context.workflow)
+            available_docs = self.services.workflow.getAvailableDocuments(self.services.currentWorkflow)
            if not available_docs or available_docs == "No documents available":
                logger.warning("No documents available for validation")
                return
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@ -68,20 +68,12 @@ def extractWorkflowHistory(service: Any, context: Any) -> str:
    """Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
    Reverse-chronological, enriched with message summaries and document labels.
    """
    # Prefer explicit workflow on context; else fall back to services.workflow
    workflow = None
    try:
-        if hasattr(context, 'workflow') and context.workflow:
+        history = getPreviousRoundContext(service, service.currentWorkflow)
            workflow = context.workflow
        elif hasattr(service, 'workflow') and service.workflow:
            workflow = service.workflow
    except Exception:
        workflow = None
    if workflow:
        history = getPreviousRoundContext(service, workflow)
        return history or "No previous workflow rounds available"
-    return "No previous workflow rounds available"
+    except Exception as e:
        logger.error(f"Error getting workflow history: {str(e)}")
        return "No previous workflow rounds available"
 def extractAvailableMethods(service: Any) -> str:
    """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
@ -390,7 +382,7 @@ def extractLatestRefinementFeedback(context: Any) -> str:
 def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
    """Summary of available documents (count only)."""
    try:
-        documents = service.workflow.getAvailableDocuments(context.workflow)
+        documents = service.workflow.getAvailableDocuments(service.currentWorkflow)
        if documents and documents != "No documents available":
            # Count only actual documents, not list labels
            doc_count = documents.count("docItem:")
@ -403,7 +395,7 @@ def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
 def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
    """Index of available documents with detailed references for parameter generation."""
    try:
-        return service.workflow.getAvailableDocuments(context.workflow)
+        return service.workflow.getAvailableDocuments(service.currentWorkflow)
    except Exception as e:
        logger.error(f"Error getting document index: {str(e)}")
        return "No documents available"