Merge branch 'int' into feat/chatbot

2025-10-15 12:38:42 +02:00 · 2025-10-15 12:38:42 +02:00 · 57118a633e
commit 57118a633e
parent e8e3b0c0db e9756bbc17
93 changed files with 13774 additions and 3691 deletions
--- a/config.ini
+++ b/config.ini
@ -30,3 +30,8 @@ Web_Search_MIN_RESULTS = 1
 Web_Crawl_TIMEOUT = 30
 Web_Crawl_MAX_RETRIES = 3
 Web_Crawl_RETRY_DELAY = 2
+
+# Web Research configuration
+Web_Research_MAX_DEPTH = 2
+Web_Research_MAX_LINKS_PER_DOMAIN = 4
+Web_Research_CRAWL_TIMEOUT_MINUTES = 10
--- a/env_dev.env
+++ b/env_dev.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000

 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000

 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQk4xYnpmbnItUEU3dHU4eHB5dzVYay1WT012RTRLUWJDTlBILVY5dC1FX3VMNjZmLThrbDRFNWFSNGprY3RRTlpYNGlubVBpNnY3MjNJcGtzVk9PMzRacl9LUlM2RU5vTVVZWHJvaUhWSHVfc1pNR0pfQmI5SEprOG5KdlB1QnQ=
+Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
 Service_MSFT_TENANT_ID = common

 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhI

 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
+
+# Debug Configuration
+APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
+APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/env_int.env
+++ b/env_int.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000

 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000

 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNzB2M3ZjaE1SVE9ON2FKam9yVURxcHl1Ym5VNVUtS0MyWUpNVXVlaWpWS2U3VVd3em9vQl9lcnVYay03bS04YjNBbDZZNTB4eUtjT3ppQjJjY3dOT0FNLW9LeDhIUU5iaTNqNURUWE5La3kzaHNGcU9yNVI0YjhWZTZRRFktcTk=
+Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
 Service_MSFT_TENANT_ID = common

 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2

 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
+
+# Debug Configuration
+APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
+APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/env_prod.env
+++ b/env_prod.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000

 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000

 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVEhHdlZHU3FNMmhuRGVwaGc3YzIxSjlZNzBCQjlOV2pSYVNXb0t1ZnVwQzZsQzY4cHMtVlZtNF85OEVaV1BMTzdXMmpzaGZpaG1DalJ0bkNPMHA5ZUcwZjNDdGk1TFdxYTJSZnVrVmhhZ2VRUEZxbjJOOGFhWk9EYlY3dmRVTnI=
+Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm83T29rSzdYLTRydXN5V3lQLXhmRjMyQ1FOaGpuek45QllaX1REN2s5aWNIUl81NGlrYlJTeFV0RlRZd0xPcm5uMDM4QlpibHJQbm5XZTlWeWxfcWNVdFpCUHI2amh0MVBnZ21IN2ptSkhWLTVfaHEwNmI5SEtiS05pQmt5eV8yMnhLMEc=
 Service_MSFT_TENANT_ID = common

 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQ

 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0=
+
+# Debug Configuration
+APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
+APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/modules/connectors/connectorAiAnthropic.py
+++ b/modules/connectors/connectorAiAnthropic.py
@ -1,5 +1,6 @@
 import logging
 import httpx
+import os
 from typing import Dict, Any, List, Union
 from fastapi import HTTPException
 from modules.shared.configuration import APP_CONFIG
@ -147,6 +148,11 @@ class AiAnthropic:
                    # Direct content as string (in older API versions)
                    content = anthropicResponse["content"]
            
+            # Debug logging for empty responses
+            if not content or content.strip() == "":
+                logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
+                content = "[Anthropic API returned empty response]"
+            
            # Return in OpenAI format
            return {
                "id": anthropicResponse.get("id", ""),
@ -182,14 +188,27 @@ class AiAnthropic:
            The analysis response as text
        """
        try:
+            # Debug logging
+            logger.info(f"callAiImage called with imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
+            
            # Distinguish between file path and binary data
            if isinstance(imageData, str):
+                # Check if it's base64 encoded data or a file path
+                if len(imageData) > 100 and not os.path.exists(imageData):
+                    # It's likely base64 encoded data
+                    logger.info("Treating imageData as base64 encoded string")
+                    base64Data = imageData
+                    if not mimeType:
+                        mimeType = "image/png"
+                else:
                    # It's a file path - import filehandling only when needed
+                    logger.info(f"Treating imageData as file path: {imageData}")
                    from modules import agentserviceFilemanager as fileHandler
                    base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
                    mimeType = mimeType or autoMimeType
            else:
                # It's binary data
+                logger.info("Treating imageData as binary data")
                import base64
                base64Data = base64.b64encode(imageData).decode('utf-8')
                # MIME type must be specified for binary data
@ -216,8 +235,16 @@ class AiAnthropic:
            # Use the existing callAiBasic function with the Vision model
            response = await self.callAiBasic(messages)
            
-            # Extract and return content
-            return response["choices"][0]["message"]["content"]
+            # Extract and return content with proper error handling
+            try:
+                content = response["choices"][0]["message"]["content"]
+                if content is None or content.strip() == "":
+                    return "[AI returned empty response for image analysis]"
+                return content
+            except (KeyError, IndexError, TypeError) as e:
+                logger.error(f"Error extracting content from AI response: {str(e)}")
+                logger.error(f"Response structure: {response}")
+                return f"[Error extracting AI response: {str(e)}]"
        
        except Exception as e:
            logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
--- a/modules/connectors/connectorAiOpenai.py
+++ b/modules/connectors/connectorAiOpenai.py
@ -189,3 +189,82 @@ class AiOpenai:
        except Exception as e:
            logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
            return f"[Error during image analysis: {str(e)}]"
+
+    async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
+        """
+        Generate an image using DALL-E 3.
+        
+        Args:
+            prompt: The text prompt for image generation
+            size: Image size (1024x1024, 1792x1024, or 1024x1792)
+            quality: Image quality (standard or hd)
+            style: Image style (vivid or natural)
+            
+        Returns:
+            Dictionary with success status and image data
+        """
+        try:
+            logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
+            
+            # DALL-E 3 API endpoint
+            dalle_url = "https://api.openai.com/v1/images/generations"
+            
+            payload = {
+                "model": "dall-e-3",
+                "prompt": prompt,
+                "size": size,
+                "quality": quality,
+                "style": style,
+                "n": 1,
+                "response_format": "b64_json"  # Get base64 data directly instead of URLs
+            }
+            
+            # Create a separate client for DALL-E API calls
+            dalle_client = httpx.AsyncClient(
+                timeout=120.0,
+                headers={
+                    "Authorization": f"Bearer {self.apiKey}",
+                    "Content-Type": "application/json"
+                }
+            )
+            
+            response = await dalle_client.post(
+                dalle_url,
+                json=payload
+            )
+            
+            await dalle_client.aclose()
+            
+            if response.status_code != 200:
+                logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
+                return {
+                    "success": False,
+                    "error": f"DALL-E API error: {response.status_code} - {response.text}"
+                }
+            
+            responseJson = response.json()
+            
+            if "data" in responseJson and len(responseJson["data"]) > 0:
+                image_data = responseJson["data"][0]["b64_json"]
+                
+                logger.info(f"Successfully generated image: {len(image_data)} characters")
+                return {
+                    "success": True,
+                    "image_data": image_data,
+                    "size": size,
+                    "quality": quality,
+                    "style": style
+                }
+            else:
+                logger.error("No image data in DALL-E response")
+                return {
+                    "success": False,
+                    "error": "No image data in DALL-E response"
+                }
+        
+        except Exception as e:
+            logger.error(f"Error during image generation: {str(e)}", exc_info=True)
+            return {
+                "success": False,
+                "error": f"Error during image generation: {str(e)}"
+            }
--- a/modules/connectors/connectorAiTavily.py
+++ b/modules/connectors/connectorAiTavily.py
@ -271,6 +271,7 @@ class ConnectorWeb:
        include_domains: list[str] | None = None,
        exclude_domains: list[str] | None = None,
        language: str | None = None,
+        country: str | None = None,
        include_answer: bool | None = None,
        include_raw_content: bool | None = None,
    ) -> list[WebSearchResult]:
@ -290,17 +291,20 @@ class ConnectorWeb:
            kwargs["time_range"] = time_range
        if topic is not None:
            kwargs["topic"] = topic
-        if include_domains is not None:
+        if include_domains is not None and len(include_domains) > 0:
            kwargs["include_domains"] = include_domains
        if exclude_domains is not None:
            kwargs["exclude_domains"] = exclude_domains
        if language is not None:
            kwargs["language"] = language
+        if country is not None:
+            kwargs["country"] = country
        if include_answer is not None:
            kwargs["include_answer"] = include_answer
        if include_raw_content is not None:
            kwargs["include_raw_content"] = include_raw_content

+        logger.debug(f"Tavily.search kwargs: {kwargs}")
        response = await self.client.search(**kwargs)

        return [
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@ -135,3 +135,29 @@ class AiCallResponse(BaseModel):
    costEstimate: Optional[float] = Field(default=None, description="Estimated cost of the call")


+class EnhancedAiCallOptions(AiCallOptions):
+    """Enhanced options for improved document processing with chunk mapping."""
+    
+    # Parallel processing
+    enableParallelProcessing: bool = Field(
+        default=True, 
+        description="Enable parallel processing of chunks"
+    )
+    maxConcurrentChunks: int = Field(
+        default=5, 
+        ge=1, 
+        le=20, 
+        description="Maximum number of chunks to process concurrently"
+    )
+    
+    # Chunk mapping
+    preserveChunkMetadata: bool = Field(
+        default=True, 
+        description="Preserve chunk metadata during processing"
+    )
+    chunkSeparator: str = Field(
+        default="\n\n---\n\n", 
+        description="Separator between chunks in merged output"
+    )
+
+
--- a/modules/datamodels/datamodelDocument.py
+++ b/modules/datamodels/datamodelDocument.py
@ -0,0 +1,130 @@
+from typing import Any, Dict, List, Optional, Literal, Union
+from pydantic import BaseModel, Field
+from datetime import datetime
+
+
+class DocumentMetadata(BaseModel):
+    """Metadata for the entire document."""
+    title: str = Field(description="Document title")
+    author: Optional[str] = Field(default=None, description="Document author")
+    created_at: datetime = Field(default_factory=datetime.now, description="Creation timestamp")
+    source_documents: List[str] = Field(default_factory=list, description="Source document IDs")
+    extraction_method: str = Field(default="ai_extraction", description="Method used for extraction")
+    version: str = Field(default="1.0", description="Document version")
+
+
+class TableData(BaseModel):
+    """Structured table data."""
+    headers: List[str] = Field(description="Table column headers")
+    rows: List[List[str]] = Field(description="Table data rows")
+    caption: Optional[str] = Field(default=None, description="Table caption")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Table metadata")
+
+
+class ListItem(BaseModel):
+    """Individual list item with optional sub-items."""
+    text: str = Field(description="List item text")
+    subitems: Optional[List['ListItem']] = Field(default=None, description="Nested sub-items")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Item metadata")
+
+
+class BulletList(BaseModel):
+    """Bulleted or numbered list."""
+    items: List[ListItem] = Field(description="List items")
+    list_type: Literal["bullet", "numbered", "checklist"] = Field(default="bullet", description="List type")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="List metadata")
+
+
+class Paragraph(BaseModel):
+    """Text paragraph with optional formatting."""
+    text: str = Field(description="Paragraph text")
+    formatting: Optional[Dict[str, Any]] = Field(default=None, description="Text formatting (bold, italic, etc.)")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Paragraph metadata")
+
+
+class Heading(BaseModel):
+    """Document heading."""
+    text: str = Field(description="Heading text")
+    level: int = Field(ge=1, le=6, description="Heading level (1-6)")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Heading metadata")
+
+
+class CodeBlock(BaseModel):
+    """Code block with syntax highlighting."""
+    code: str = Field(description="Code content")
+    language: Optional[str] = Field(default=None, description="Programming language")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Code block metadata")
+
+
+class Image(BaseModel):
+    """Image with metadata."""
+    data: str = Field(description="Base64 encoded image data")
+    alt_text: Optional[str] = Field(default=None, description="Alternative text")
+    caption: Optional[str] = Field(default=None, description="Image caption")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Image metadata")
+
+
+class DocumentSection(BaseModel):
+    """A section of the document containing one or more content elements."""
+    id: str = Field(description="Unique section identifier")
+    title: Optional[str] = Field(default=None, description="Section title")
+    content_type: Literal["table", "list", "paragraph", "heading", "code", "image", "mixed"] = Field(description="Primary content type")
+    elements: List[Union[TableData, BulletList, Paragraph, Heading, CodeBlock, Image]] = Field(description="Content elements in this section")
+    order: int = Field(description="Section order in document")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Section metadata")
+
+
+class StructuredDocument(BaseModel):
+    """Complete structured document in JSON format."""
+    metadata: DocumentMetadata = Field(description="Document metadata")
+    sections: List[DocumentSection] = Field(description="Document sections")
+    summary: Optional[str] = Field(default=None, description="Document summary")
+    tags: List[str] = Field(default_factory=list, description="Document tags")
+    
+    def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
+        """Get all sections of a specific content type."""
+        return [section for section in self.sections if section.content_type == content_type]
+    
+    def get_all_tables(self) -> List[TableData]:
+        """Get all table data from the document."""
+        tables = []
+        for section in self.sections:
+            for element in section.elements:
+                if isinstance(element, TableData):
+                    tables.append(element)
+        return tables
+    
+    def get_all_lists(self) -> List[BulletList]:
+        """Get all lists from the document."""
+        lists = []
+        for section in self.sections:
+            for element in section.elements:
+                if isinstance(element, BulletList):
+                    lists.append(element)
+        return lists
+
+
+class JsonChunkResult(BaseModel):
+    """Result from processing a single chunk with JSON output."""
+    chunk_id: str = Field(description="Chunk identifier")
+    document_section: DocumentSection = Field(description="Structured content from this chunk")
+    processing_time: float = Field(description="Processing time in seconds")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Chunk processing metadata")
+
+
+class JsonMergeResult(BaseModel):
+    """Result from merging multiple JSON chunks."""
+    merged_document: StructuredDocument = Field(description="Merged structured document")
+    merge_strategy: str = Field(description="Strategy used for merging")
+    chunks_processed: int = Field(description="Number of chunks processed")
+    merge_time: float = Field(description="Time taken to merge chunks")
+    metadata: Dict[str, Any] = Field(default_factory=dict, description="Merge process metadata")
+
+
+# Update forward references (compatible with Pydantic v1 and v2)
+try:
+    # Pydantic v2
+    ListItem.model_rebuild()
+except AttributeError:
+    # Pydantic v1
+    ListItem.update_forward_refs()
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@ -18,6 +18,16 @@ class ContentExtracted(BaseModel):
    summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")


+class ChunkResult(BaseModel):
+    """Preserves the relationship between a chunk and its AI result."""
+    originalChunk: ContentPart
+    aiResult: str
+    chunkIndex: int
+    documentId: str
+    processingTime: float = 0.0
+    metadata: Dict[str, Any] = Field(default_factory=dict)
+
+
 class MergeStrategy(BaseModel):
    """Strategy configuration for merging content parts and AI results."""
    
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -1,4 +1,5 @@
 import logging
+import asyncio
 from typing import Dict, Any, List, Union, Tuple, Optional
 from dataclasses import dataclass

@ -260,6 +261,7 @@ class AiObjects:
        if not requiredTags:
            requiredTags = OPERATION_TAG_MAPPING.get(options.operationType, [ModelTags.TEXT, ModelTags.CHAT])
        
+        
        # Override priority based on processing mode if not explicitly set
        effectivePriority = options.priority
        if options.priority == Priority.BALANCED:
@ -268,6 +270,7 @@ class AiObjects:
        logger.info(f"Model selection - Operation: {options.operationType}, Required tags: {requiredTags}, Priority: {effectivePriority}")
        
        for name, info in aiModels.items():
+            logger.info(f"Checking model: {name}, tags: {info.get('tags', [])}, function: {info.get('function', 'unknown')}")
            # Check context length
            if info["contextLength"] > 0 and totalSize > info["contextLength"] * 0.8:
                continue
@ -279,8 +282,11 @@ class AiObjects:
            
            # Check required tags/capabilities
            modelTags = info.get("tags", [])
-            if requiredTags and not any(tag in modelTags for tag in requiredTags):
+            if requiredTags and not all(tag in modelTags for tag in requiredTags):
+                logger.info(f"  -> Skipping {name}: missing required tags. Has: {modelTags}, needs: {requiredTags}")
                continue
+            else:
+                logger.info(f"  -> {name} passed tag check")
            
            # Check processing mode requirements
            if options.processingMode == ProcessingMode.DETAILED and ModelTags.FAST in modelTags:
@ -288,16 +294,24 @@ class AiObjects:
                continue
            
            candidates[name] = info
+            logger.info(f"  -> {name} added to candidates")
+        
+        logger.info(f"Final candidates: {list(candidates.keys())}")
        
        if not candidates:
+            logger.info("No candidates found, using fallback")
            # Fallback based on operation type
            if options.operationType == OperationType.IMAGE_ANALYSIS:
+                logger.info("Using fallback: openai_callAiImage")
                return "openai_callAiImage"
            elif options.operationType == OperationType.IMAGE_GENERATION:
+                logger.info("Using fallback: openai_generateImage")
                return "openai_generateImage"
            elif options.operationType == OperationType.WEB_RESEARCH:
+                logger.info("Using fallback: perplexity_callAiWithWebSearch")
                return "perplexity_callAiWithWebSearch"
            else:
+                logger.info("Using fallback: openai_callAiBasic_gpt35")
                return "openai_callAiBasic_gpt35"
        
        # Special handling for planning operations - use Claude for consistency
@ -313,17 +327,60 @@ class AiObjects:
        
        # Select based on priority for other operations
        if effectivePriority == Priority.SPEED:
-            return max(candidates, key=lambda k: candidates[k]["speedRating"])
+            selected = max(candidates, key=lambda k: candidates[k]["speedRating"])
+            logger.info(f"Selected by SPEED: {selected}")
+            return selected
        elif effectivePriority == Priority.QUALITY:
-            return max(candidates, key=lambda k: candidates[k]["qualityRating"])
+            selected = max(candidates, key=lambda k: candidates[k]["qualityRating"])
+            logger.info(f"Selected by QUALITY: {selected}")
+            return selected
        elif effectivePriority == Priority.COST:
-            return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
+            selected = min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
+            logger.info(f"Selected by COST: {selected}")
+            return selected
        else:  # BALANCED
            def balancedScore(name: str) -> float:
                info = candidates[name]
                return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
            
-            return max(candidates, key=balancedScore)
+            selected = max(candidates, key=balancedScore)
+            logger.info(f"Selected by BALANCED: {selected}")
+            return selected
+
+    def _getFallbackModels(self, operationType: str) -> List[str]:
+        """Get ordered list of fallback models for a given operation type."""
+        fallbackMappings = {
+            OperationType.GENERAL: [
+                "openai_callAiBasic_gpt35",  # Fast and reliable
+                "openai_callAiBasic",         # High quality
+                "anthropic_callAiBasic",      # Alternative high quality
+                "perplexity_callAiBasic"      # Cost effective
+            ],
+            OperationType.IMAGE_ANALYSIS: [
+                "openai_callAiImage",         # Primary image analysis
+                "anthropic_callAiImage"       # Alternative image analysis
+            ],
+            OperationType.IMAGE_GENERATION: [
+                "openai_generateImage"         # Only image generation model
+            ],
+            OperationType.WEB_RESEARCH: [
+                "perplexity_callAiWithWebSearch",  # Primary web research
+                "perplexity_callAiBasic",          # Alternative with web search
+                "openai_callAiBasic"               # Fallback to general model
+            ],
+            OperationType.GENERATE_PLAN: [
+                "anthropic_callAiBasic",      # Best for planning
+                "openai_callAiBasic",         # High quality alternative
+                "openai_callAiBasic_gpt35"   # Fast fallback
+            ],
+            OperationType.ANALYSE_CONTENT: [
+                "anthropic_callAiBasic",     # Best for analysis
+                "openai_callAiBasic",        # High quality alternative
+                "openai_callAiBasic_gpt35"  # Fast fallback
+            ]
+        }
+        
+        return fallbackMappings.get(operationType, fallbackMappings[OperationType.GENERAL])

    def _connectorFor(self, modelName: str):
        """Get the appropriate connector for the model."""
@ -340,7 +397,7 @@ class AiObjects:
            raise ValueError(f"Unknown connector type: {connectorType}")

    async def call(self, request: AiCallRequest) -> AiCallResponse:
-        """Call AI model for text generation."""
+        """Call AI model for text generation with fallback mechanism."""
        prompt = request.prompt
        context = request.context or ""
        options = request.options
@ -357,9 +414,6 @@ class AiObjects:
        if options.compressContext and len(context.encode("utf-8")) > 70000:
            context = maybeTruncate(context, 70000)

-        # Select model for text generation
-        modelName = self._selectModel(prompt, context, options)
-
        # Derive generation parameters
        temperature = getattr(options, "temperature", None)
        if temperature is None:
@ -376,6 +430,15 @@ class AiObjects:
            messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
        messages.append({"role": "user", "content": prompt})

+        # Get fallback models for this operation type
+        fallbackModels = self._getFallbackModels(options.operationType)
+        
+        # Try primary model first, then fallbacks
+        lastError = None
+        for attempt, modelName in enumerate(fallbackModels):
+            try:
+                logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
+                
                connector = self._connectorFor(modelName)
                functionName = aiModels[modelName]["function"]
                
@ -406,29 +469,74 @@ class AiObjects:
                else:
                    raise ValueError(f"Function {functionName} not supported for text generation")

-        # Estimate cost/tokens
+                # Success! Estimate cost/tokens and return
                totalSize = len((prompt + context).encode("utf-8"))
                cost = self._estimateCost(aiModels[modelName], totalSize)
                usedTokens = int(totalSize / 4)
                
+                logger.info(f"✅ AI call successful with model: {modelName}")
                return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
                
+            except Exception as e:
+                lastError = e
+                logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
+                
+                # If this is not the last model, try the next one
+                if attempt < len(fallbackModels) - 1:
+                    logger.info(f"🔄 Trying next fallback model...")
+                    continue
+                else:
+                    # All models failed
+                    logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
+                    break
+
+        # All fallback attempts failed
+        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
+        logger.error(errorMsg)
+        raise Exception(errorMsg)
+
    async def callImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None, options: AiCallOptions = None) -> str:
-        """Call AI model for image analysis."""
+        """Call AI model for image analysis with fallback mechanism."""
        if options is None:
            options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
        
-        # Select model for image analysis
-        modelName = self._selectModel(prompt, "", options)
+        # Get fallback models for image analysis
+        fallbackModels = self._getFallbackModels(OperationType.IMAGE_ANALYSIS)
+        
+        # Try primary model first, then fallbacks
+        lastError = None
+        for attempt, modelName in enumerate(fallbackModels):
+            try:
+                logger.info(f"Attempting image analysis with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
                
                connector = self._connectorFor(modelName)
                functionName = aiModels[modelName]["function"]
                
                if functionName == "callAiImage":
-            return await connector.callAiImage(prompt, imageData, mimeType)
+                    content = await connector.callAiImage(prompt, imageData, mimeType)
+                    logger.info(f"✅ Image analysis successful with model: {modelName}")
+                    return content
                else:
                    raise ValueError(f"Function {functionName} not supported for image analysis")
                    
+            except Exception as e:
+                lastError = e
+                logger.warning(f"❌ Image analysis failed with model {modelName}: {str(e)}")
+                
+                # If this is not the last model, try the next one
+                if attempt < len(fallbackModels) - 1:
+                    logger.info(f"🔄 Trying next fallback model for image analysis...")
+                    continue
+                else:
+                    # All models failed
+                    logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
+                    break
+
+        # All fallback attempts failed
+        errorMsg = f"All AI models failed for image analysis. Last error: {str(lastError)}"
+        logger.error(errorMsg)
+        raise Exception(errorMsg)
+
    async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", options: AiCallOptions = None) -> Dict[str, Any]:
        """Generate an image using AI."""
        if options is None:
@ -694,7 +802,22 @@ class AiObjects:
            logger.warning(f"Failed to extract links from content: {e}")
            return []

-    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
+    def _normalizeUrl(self, url: str) -> str:
+        """Normalize URL to handle variations that should be considered duplicates."""
+        if not url:
+            return url
+        
+        # Remove trailing slashes and fragments
+        url = url.rstrip('/')
+        if '#' in url:
+            url = url.split('#')[0]
+        
+        # Handle common URL variations
+        url = url.replace('http://', 'https://')  # Normalize protocol
+        
+        return url
+
+    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
        """
        Recursively crawl URLs up to specified depth.
        
@ -703,19 +826,28 @@ class AiObjects:
            max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
            extract_depth: Tavily extract depth setting
            max_per_domain: Maximum URLs per domain per level
+            global_processed_urls: Optional global set to track processed URLs across sessions
            
        Returns:
            Dictionary mapping URL -> content for all crawled pages
        """
        logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
        
-        # URL index to track all processed URLs
+        # URL index to track all processed URLs (local + global)
        processed_urls = set()
+        if global_processed_urls is not None:
+            # Use global index if provided, otherwise create local one
+            processed_urls = global_processed_urls
+            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
+        else:
+            logger.info("Using local URL index for this crawl session")
+        
        all_content = {}
        
        # Current level URLs to process
        current_level_urls = urls.copy()
        
+        try:
            for depth in range(1, max_depth + 1):
                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
@ -724,18 +856,21 @@ class AiObjects:
                next_level_urls = []
                
                for url in current_level_urls:
-                if url in processed_urls:
-                    logger.debug(f"URL {url} already processed, skipping")
+                    # Normalize URL for duplicate checking
+                    normalized_url = self._normalizeUrl(url)
+                    if normalized_url in processed_urls:
+                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
                        continue
                    
                    try:
                        logger.info(f"Processing URL at depth {depth}: {url}")
+                        logger.debug(f"Total processed URLs so far: {len(processed_urls)}")
                        
                        # Read page content
                        content = await self.readPage(url, extract_depth)
                        if content:
                            all_content[url] = content
-                        processed_urls.add(url)
+                            processed_urls.add(normalized_url)
                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
                            
                            # Get URLs from this page for next level
@ -749,18 +884,21 @@ class AiObjects:
                            # Add new URLs to next level (avoiding already processed ones)
                            new_urls_count = 0
                            for new_url in filtered_urls:
-                            if new_url not in processed_urls:
+                                normalized_new_url = self._normalizeUrl(new_url)
+                                if normalized_new_url not in processed_urls:
                                    next_level_urls.append(new_url)
                                    new_urls_count += 1
+                                else:
+                                    logger.debug(f"URL {new_url} (normalized: {normalized_new_url}) already processed, skipping")
                            
                            logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
                        else:
                            logger.warning(f"✗ No content extracted from {url}")
-                        processed_urls.add(url)  # Mark as processed to avoid retry
+                            processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                            
                    except Exception as e:
                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
-                    processed_urls.add(url)  # Mark as processed to avoid retry
+                        processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                
                # Prepare for next iteration
                current_level_urls = next_level_urls
@ -772,6 +910,15 @@ class AiObjects:
                    break
            
            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
+            logger.info(f"Total URLs processed (including skipped): {len(processed_urls)}")
+            logger.info(f"Unique URLs found: {len(all_content)}")
+            return all_content
+            
+        except asyncio.TimeoutError:
+            logger.warning(f"Crawling timed out, returning partial results: {len(all_content)} pages crawled so far")
+            return all_content
+        except Exception as e:
+            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
            return all_content

    async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@ -571,7 +571,9 @@ class ChatObjects:
                actionName=createdMessage.get("actionName")
            )
            
-            # Debug: Store message and documents for debugging TODO REMOVE
+            # Debug: Store message and documents for debugging - only if debug enabled
+            debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if debug_enabled:
                self._storeDebugMessageAndDocuments(chat_message)
            
            return chat_message
@ -1052,8 +1054,11 @@ class ChatObjects:

    def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
        """
-        Store message and documents for debugging purposes in fileshare.
-        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
+        Store message and documents (metadata and file bytes) for debugging purposes.
+        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/
+        - message.json, message_text.txt
+        - document_###_metadata.json
+        - document_###_<original_filename> (actual file bytes)
        
        Args:
            message: ChatMessage object to store
@ -1157,6 +1162,26 @@ class ChatObjects:
                        
                        logger.info(f"Debug: Stored document metadata for {doc.fileName}")

+                        # Also store the actual file bytes next to metadata for debugging
+                        try:
+                            # Lazy import to avoid circular deps at module load
+                            from modules.interfaces import interfaceDbComponentObjects as comp
+                            componentInterface = comp.getInterface(self.currentUser)
+                            file_bytes = componentInterface.getFileData(doc.fileId)
+                            if file_bytes:
+                                # Build a safe filename preserving original name
+                                safe_name = doc.fileName or f"document_{i+1:03d}"
+                                # Avoid path traversal
+                                safe_name = os.path.basename(safe_name)
+                                doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
+                                with open(doc_file_path, "wb") as df:
+                                    df.write(file_bytes)
+                                logger.info(f"Debug: Stored document file bytes: {doc_file_path} ({len(file_bytes)} bytes)")
+                            else:
+                                logger.warning(f"Debug: No file bytes returned for fileId {doc.fileId}")
+                        except Exception as e:
+                            logger.error(f"Debug: Failed to store document file for {doc.fileName} (fileId {doc.fileId}): {e}")
+            
            logger.info(f"Debug: Stored message and documents in {message_path}")
            
        except Exception as e:
--- a/modules/routes/routeDataPrompts.py
+++ b/modules/routes/routeDataPrompts.py
@ -95,8 +95,8 @@ async def update_prompt(
            detail=f"Prompt with ID {promptId} not found"
        )
    
-    # Convert Prompt to dict for interface
-    update_data = promptData.dict()
+    # Convert Prompt to dict for interface, excluding the id field
+    update_data = promptData.dict(exclude={'id'})
    
    # Update prompt
    updatedPrompt = managementInterface.updatePrompt(promptId, update_data)
--- a/modules/routes/routeSecurityLocal.py
+++ b/modules/routes/routeSecurityLocal.py
@ -14,7 +14,7 @@ from pydantic import BaseModel

 # Import auth modules
 from modules.security.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
-from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie
+from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie, clearAccessTokenCookie, clearRefreshTokenCookie
 from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
 from modules.datamodels.datamodelUam import User, UserInDB, AuthAuthority, UserPrivilege
 from modules.datamodels.datamodelSecurity import Token
@ -263,8 +263,7 @@ async def read_user_me(
@limiter.limit("60/minute")
 async def refresh_token(
    request: Request,
-    response: Response,
-    currentUser: User = Depends(getCurrentUser)
+    response: Response
 ) -> Dict[str, Any]:
    """Refresh access token using refresh token from cookie"""
    try:
@ -283,12 +282,27 @@ async def refresh_token(
        except jwt.JWTError:
            raise HTTPException(status_code=401, detail="Invalid refresh token")
        
+        # Get user information from refresh token payload
+        user_id = payload.get("userId")
+        if not user_id:
+            raise HTTPException(status_code=401, detail="Invalid refresh token - missing user ID")
+        
+        # Get user from database using the user ID from refresh token
+        try:
+            app_interface = getRootInterface()
+            current_user = app_interface.getUser(user_id)
+            if not current_user:
+                raise HTTPException(status_code=401, detail="User not found")
+        except Exception as e:
+            logger.error(f"Failed to get user from database: {str(e)}")
+            raise HTTPException(status_code=500, detail="Failed to validate user")
+        
        # Create new token data
        token_data = {
-            "sub": currentUser.username,
-            "mandateId": str(currentUser.mandateId),
-            "userId": str(currentUser.id),
-            "authenticationAuthority": currentUser.authenticationAuthority
+            "sub": current_user.username,
+            "mandateId": str(current_user.mandateId),
+            "userId": str(current_user.id),
+            "authenticationAuthority": current_user.authenticationAuthority
        }
        
        # Create new access token + set cookie
@ -365,15 +379,18 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
            # Don't fail if audit logging fails
            pass

-        # Clear httpOnly cookies
-        response.delete_cookie(key="auth_token", httponly=True, samesite="strict")
-        response.delete_cookie(key="refresh_token", httponly=True, samesite="strict")
-        
-        return JSONResponse({
+        # Create the JSON response first
+        json_response = JSONResponse({
            "message": "Successfully logged out - cookies cleared",
            "revokedTokens": revoked
        })
        
+        # Clear httpOnly cookies on the response we're actually returning
+        clearAccessTokenCookie(json_response)
+        clearRefreshTokenCookie(json_response)
+        
+        return json_response
+        
    except Exception as e:
        logger.error(f"Error during logout: {str(e)}")
        raise HTTPException(
--- a/modules/security/jwtService.py
+++ b/modules/security/jwtService.py
@ -17,6 +17,11 @@ ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
 ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
 REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))

+# Cookie security settings - use secure cookies based on whether API uses HTTPS
+# Cookies must have secure=True on HTTPS sites, secure=False on HTTP sites
+APP_API_URL = APP_CONFIG.get("APP_API_URL", "http://localhost:8000")
+USE_SECURE_COOKIES = APP_API_URL.startswith("https://") if APP_API_URL else False
+

 def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, "datetime"]:
    """Create a JWT access token and return (token, expiresAt)."""
@ -52,8 +57,9 @@ def setAccessTokenCookie(response: Response, token: str, expiresDelta: Optional[
        key="auth_token",
        value=token,
        httponly=True,
-        secure=True,
+        secure=USE_SECURE_COOKIES,  # Only secure in production (HTTPS)
        samesite="strict",
+        path="/",
        max_age=maxAge
    )

@ -64,9 +70,46 @@ def setRefreshTokenCookie(response: Response, token: str) -> None:
        key="refresh_token",
        value=token,
        httponly=True,
-        secure=True,
+        secure=USE_SECURE_COOKIES,  # Only secure in production (HTTPS)
        samesite="strict",
+        path="/",
        max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60
    )


+def clearAccessTokenCookie(response: Response) -> None:
+    """
+    Clear access token cookie by setting it to expire immediately.
+    Uses both raw header manipulation and FastAPI's delete_cookie for maximum browser compatibility.
+    """
+    # Build secure flag based on environment
+    secure_flag = "; Secure" if USE_SECURE_COOKIES else ""
+    
+    # Primary method: Raw Set-Cookie header for guaranteed deletion
+    response.headers.append(
+        "Set-Cookie",
+        f"auth_token=deleted; Path=/; Max-Age=0; Expires=Thu, 01 Jan 1970 00:00:00 GMT; HttpOnly{secure_flag}; SameSite=Strict"
+    )
+    
+    # Fallback: Also use FastAPI's built-in method
+    response.delete_cookie(key="auth_token", path="/")
+
+
+def clearRefreshTokenCookie(response: Response) -> None:
+    """
+    Clear refresh token cookie by setting it to expire immediately.
+    Uses both raw header manipulation and FastAPI's delete_cookie for maximum browser compatibility.
+    """
+    # Build secure flag based on environment
+    secure_flag = "; Secure" if USE_SECURE_COOKIES else ""
+    
+    # Primary method: Raw Set-Cookie header for guaranteed deletion
+    response.headers.append(
+        "Set-Cookie",
+        f"refresh_token=deleted; Path=/; Max-Age=0; Expires=Thu, 01 Jan 1970 00:00:00 GMT; HttpOnly{secure_flag}; SameSite=Strict"
+    )
+    
+    # Fallback: Also use FastAPI's built-in method
+    response.delete_cookie(key="refresh_token", path="/")
+
+
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@ -0,0 +1,596 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
+from modules.interfaces.interfaceAiObjects import AiObjects
+
+logger = logging.getLogger(__name__)
+
+
+class SubCoreAi:
+    """Core AI operations including image analysis, text generation, and planning calls."""
+
+    def __init__(self, services, aiObjects):
+        """Initialize core AI operations.
+        
+        Args:
+            services: Service center instance for accessing other services
+            aiObjects: Initialized AiObjects instance
+        """
+        self.services = services
+        self.aiObjects = aiObjects
+    
+    # AI Processing Call
+    async def callAi(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]] = None,
+        placeholders: Optional[List[PromptPlaceholder]] = None,
+        options: Optional[AiCallOptions] = None,
+        outputFormat: Optional[str] = None,
+        title: Optional[str] = None,
+        documentProcessor=None,
+        documentGenerator=None
+        ) -> Union[str, Dict[str, Any]]:
+        """
+        Unified AI call interface that automatically routes to appropriate handler.
+        
+        Args:
+            prompt: The main prompt for the AI call
+            documents: Optional list of documents to process
+            placeholders: Optional list of placeholder replacements for planning calls
+            options: AI call configuration options
+            outputFormat: Optional output format (html, pdf, docx, txt, md, json, csv, xlsx) for document generation
+            title: Optional title for generated documents
+            documentProcessor: Document processing service instance
+            documentGenerator: Document generation service instance
+        
+        Returns:
+            AI response as string, or dict with documents if outputFormat is specified
+            
+        Raises:
+            Exception: If all available models fail
+        """
+        if options is None:
+            options = AiCallOptions()
+        
+        # Normalize placeholders from List[PromptPlaceholder]
+        placeholders_dict: Dict[str, str] = {}
+        placeholders_meta: Dict[str, bool] = {}
+        if placeholders:
+            placeholders_dict = {p.label: p.content for p in placeholders}
+            placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders}
+
+        # Auto-determine call type based on documents and operation type
+        call_type = self._determineCallType(documents, options.operationType)
+        options.callType = call_type
+        
+        try:
+            # Build the full prompt that will be sent to AI
+            if placeholders:
+                full_prompt = prompt
+                for p in placeholders:
+                    placeholder = f"{{{{KEY:{p.label}}}}}"
+                    full_prompt = full_prompt.replace(placeholder, p.content)
+            else:
+                full_prompt = prompt
+            
+            self._writeAiResponseDebug(
+                label='ai_prompt_debug',
+                content=full_prompt,
+                partIndex=1,
+                modelName=None,
+                continuation=False
+            )
+        except Exception:
+            pass
+        
+        # Handle document generation with specific output format
+        if outputFormat and documentGenerator:
+            result = await documentGenerator.callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title)
+            # Log AI response for debugging
+            try:
+                if isinstance(result, dict) and 'content' in result:
+                    self._writeAiResponseDebug(
+                        label='ai_document_generation',
+                        content=result['content'],
+                        partIndex=1,
+                        modelName=None,  # Document generation doesn't return model info
+                        continuation=False
+                    )
+            except Exception:
+                pass
+            return result
+        
+        if call_type == "planning":
+            result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options)
+            # Log AI response for debugging
+            try:
+                self._writeAiResponseDebug(
+                    label='ai_planning',
+                    content=result or "",
+                    partIndex=1,
+                    modelName=None,  # Planning doesn't return model info
+                    continuation=False
+                )
+            except Exception:
+                pass
+            return result
+        else:
+            # Set processDocumentsIndividually from the legacy parameter if not set in options
+            if options.processDocumentsIndividually is None and documents:
+                options.processDocumentsIndividually = False  # Default to batch processing
+            
+            # For text calls, we need to build the full prompt with placeholders here
+            # since _callAiText doesn't handle placeholders directly
+            if placeholders_dict:
+                full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict)
+            else:
+                full_prompt = prompt
+            
+            if documentProcessor and documents:
+                result = await documentProcessor.callAiText(full_prompt, documents, options)
+            else:
+                # Fallback to direct AI call if no document processor available
+                request = AiCallRequest(
+                    prompt=full_prompt,
+                    context="",
+                    options=options
+                )
+                response = await self.aiObjects.call(request)
+                result = response.content
+            
+            # Log AI response for debugging (additional logging for text calls)
+            try:
+                self._writeAiResponseDebug(
+                    label='ai_text_main',
+                    content=result or "",
+                    partIndex=1,
+                    modelName=None,  # Text calls already log internally
+                    continuation=False
+                )
+            except Exception:
+                pass
+            return result
+
+    # AI Image Analysis
+    async def readImage(
+        self,
+        prompt: str,
+        imageData: Union[str, bytes],
+        mimeType: str = None,
+        options: Optional[AiCallOptions] = None,
+        ) -> str:
+        """Call AI for image analysis using interface.callImage()."""
+        try:
+            # Check if imageData is valid
+            if not imageData:
+                error_msg = "No image data provided"
+                self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
+                logger.error(f"Error in AI image analysis: {error_msg}")
+                return f"Error: {error_msg}"
+            
+            self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
+            logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
+            
+            # Always use IMAGE_ANALYSIS operation type for image processing
+            if options is None:
+                options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
+            else:
+                # Override the operation type to ensure image analysis
+                options.operationType = OperationType.IMAGE_ANALYSIS
+            
+            self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
+            logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
+            result = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
+            
+            # Debug the result
+            self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE")
+            
+            # Check if result is valid
+            if not result or (isinstance(result, str) and not result.strip()):
+                error_msg = f"No response from AI image analysis (result: {repr(result)})"
+                self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
+                logger.error(f"Error in AI image analysis: {error_msg}")
+                return f"Error: {error_msg}"
+            
+            self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
+            logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
+            return result
+        except Exception as e:
+            self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
+            logger.error(f"Error in AI image analysis: {str(e)}")
+            return f"Error: {str(e)}"
+
+    # AI Image Generation
+    async def generateImage(
+        self,
+        prompt: str,
+        size: str = "1024x1024",
+        quality: str = "standard",
+        style: str = "vivid",
+        options: Optional[AiCallOptions] = None,
+        ) -> Dict[str, Any]:
+        """Generate an image using AI using interface.generateImage()."""
+        try:
+            return await self.aiObjects.generateImage(prompt, size, quality, style, options)
+        except Exception as e:
+            logger.error(f"Error in AI image generation: {str(e)}")
+            return {"success": False, "error": str(e)}
+
+    def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str:
+        """
+        Determine call type based on documents and operation type.
+        
+        Criteria: no documents AND operationType is "generate_plan" -> planning
+        All other cases -> text
+        """
+        has_documents = documents is not None and len(documents) > 0
+        is_planning_operation = operation_type == OperationType.GENERATE_PLAN
+        
+        if not has_documents and is_planning_operation:
+            return "planning"
+        else:
+            return "text"
+
+    async def _callAiPlanning(
+        self,
+        prompt: str,
+        placeholders: Optional[Dict[str, str]],
+        placeholdersMeta: Optional[Dict[str, bool]],
+        options: AiCallOptions
+        ) -> str:
+        """
+        Handle planning calls with placeholder system and selective summarization.
+        """
+        # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally
+        effective_placeholders = placeholders or {}
+        full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+
+        if options.compressPrompt and placeholdersMeta:
+            # Determine model capacity
+            try:
+                caps = self._getModelCapabilitiesForContent(full_prompt, None, options)
+                max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8")))
+            except Exception:
+                max_bytes = len(full_prompt.encode("utf-8"))
+
+            current_bytes = len(full_prompt.encode("utf-8"))
+            if current_bytes > max_bytes:
+                # Compute total bytes contributed by allowed placeholders (approximate by content length)
+                allowed_labels = [l for l, allow in placeholdersMeta.items() if allow]
+                allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
+                total_allowed = sum(allowed_sizes.values())
+
+                overage = current_bytes - max_bytes
+                if total_allowed > 0 and overage > 0:
+                    # Target total for allowed after reduction
+                    target_allowed = max(total_allowed - overage, 0)
+                    # Global ratio to apply across allowed placeholders
+                    ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0
+                    ratio = max(0.0, min(1.0, ratio))
+
+                    reduced: Dict[str, str] = {}
+                    for label, content in effective_placeholders.items():
+                        if label in allowed_labels and isinstance(content, str) and len(content) > 0:
+                            old_len = len(content)
+                            # Reduce by proportional ratio on characters (fallback if empty)
+                            reduction_factor = ratio if old_len > 0 else 1.0
+                            reduced[label] = self._reduceText(content, reduction_factor)
+                        else:
+                            reduced[label] = content
+
+                    effective_placeholders = reduced
+                    full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+
+                    # If still slightly over, perform a second-pass fine adjustment with updated ratio
+                    current_bytes = len(full_prompt.encode("utf-8"))
+                    if current_bytes > max_bytes and total_allowed > 0:
+                        overage2 = current_bytes - max_bytes
+                        # Recompute allowed sizes after first reduction
+                        allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
+                        total_allowed2 = sum(allowed_sizes2.values())
+                        if total_allowed2 > 0 and overage2 > 0:
+                            target_allowed2 = max(total_allowed2 - overage2, 0)
+                            ratio2 = target_allowed2 / total_allowed2
+                            ratio2 = max(0.0, min(1.0, ratio2))
+                            reduced2: Dict[str, str] = {}
+                            for label, content in effective_placeholders.items():
+                                if label in allowed_labels and isinstance(content, str) and len(content) > 0:
+                                    old_len = len(content)
+                                    reduction_factor = ratio2 if old_len > 0 else 1.0
+                                    reduced2[label] = self._reduceText(content, reduction_factor)
+                                else:
+                                    reduced2[label] = content
+                            effective_placeholders = reduced2
+                            full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
+        
+        
+        # Make AI call using AiObjects (let it handle model selection)
+        request = AiCallRequest(
+            prompt=full_prompt,
+            context="",  # Context is already included in the prompt
+            options=options
+        )
+        response = await self.aiObjects.call(request)
+        try:
+            logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}")
+        except Exception:
+            pass
+        return response.content
+
+    async def _callAiDirect(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        documentProcessor=None
+        ) -> Dict[str, Any]:
+        """
+        Call AI directly with prompt and documents for JSON output.
+        Used for multi-file generation - uses the existing generation pipeline.
+        """
+        # Use the existing generation pipeline that already works
+        # This ensures proper document processing and content extraction
+        logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents")
+        
+        if documentProcessor:
+            # Process documents with JSON merging using the existing pipeline
+            result = await documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
+        else:
+            # Fallback to simple AI call
+            request = AiCallRequest(
+                prompt=prompt,
+                context="",
+                options=options
+            )
+            response = await self.aiObjects.call(request)
+            result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "content_type": "paragraph", "elements": [{"text": response.content}]}]}
+        
+        # Convert single-file result to multi-file format if needed
+        if "sections" in result and "documents" not in result:
+            logger.info("Converting single-file result to multi-file format")
+            # This is a single-file result, convert it to multi-file format
+            return {
+                "metadata": result.get("metadata", {"title": "Converted Document"}),
+                "documents": [{
+                    "id": "doc_1",
+                    "title": result.get("metadata", {}).get("title", "Document"),
+                    "filename": "document.txt",
+                    "sections": result.get("sections", [])
+                }]
+            }
+        
+        return result
+
+    def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
+        """
+        Get model capabilities for content processing, including appropriate size limits for chunking.
+        """
+        # Estimate total content size
+        prompt_size = len(prompt.encode('utf-8'))
+        document_size = 0
+        if documents:
+            # Rough estimate of document content size
+            for doc in documents:
+                document_size += doc.fileSize or 0
+        
+        total_size = prompt_size + document_size
+        
+        # Use AiObjects to select the best model for this content size
+        # We'll simulate the model selection by checking available models
+        from modules.interfaces.interfaceAiObjects import aiModels
+        
+        # Find the best model for this content size and operation
+        best_model = None
+        best_context_length = 0
+        
+        for model_name, model_info in aiModels.items():
+            context_length = model_info.get("contextLength", 0)
+            
+            # Skip models with no context length or too small for content
+            if context_length == 0:
+                continue
+                
+            # Check if model supports the operation type
+            capabilities = model_info.get("capabilities", [])
+            if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
+                continue
+            elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
+                continue
+            elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
+                continue
+            elif "text_generation" not in capabilities:
+                continue
+            
+            # Prefer models that can handle the content without chunking, but allow chunking if needed
+            if context_length >= total_size * 0.8:  # 80% of content size
+                if context_length > best_context_length:
+                    best_model = model_info
+                    best_context_length = context_length
+            elif best_model is None:  # Fallback to largest available model
+                if context_length > best_context_length:
+                    best_model = model_info
+                    best_context_length = context_length
+        
+        # Fallback to a reasonable default if no model found
+        if best_model is None:
+            best_model = {
+                "contextLength": 128000,  # GPT-4o default
+                "llmName": "gpt-4o"
+            }
+        
+        # Calculate appropriate sizes
+        # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
+        context_length_bytes = int(best_model["contextLength"] * 4)
+        max_context_bytes = int(context_length_bytes * 0.9)  # 90% of context length
+        text_chunk_size = int(max_context_bytes * 0.7)  # 70% of max context for text chunks
+        image_chunk_size = int(max_context_bytes * 0.8)  # 80% of max context for image chunks
+        
+        logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
+        logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
+        logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
+        
+        return {
+            "maxContextBytes": max_context_bytes,
+            "textChunkSize": text_chunk_size,
+            "imageChunkSize": image_chunk_size
+        }
+
+    def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
+        """
+        Get models capable of handling the specific operation with capability filtering.
+        """
+        # Use the actual AI objects model selection instead of hardcoded default
+        if hasattr(self, 'aiObjects') and self.aiObjects:
+            # Let AiObjects handle the model selection
+            return []
+        else:
+            # Fallback to default model if AiObjects not available
+            default_model = ModelCapabilities(
+                name="default",
+                maxTokens=4000,
+                capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
+                costPerToken=0.001,
+                processingTime=1.0,
+                isAvailable=True
+            )
+            return [default_model]
+
+    def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
+        """
+        Build full prompt by replacing placeholders with their content.
+        Uses the new {{KEY:placeholder}} format.
+        """
+        if not placeholders:
+            return prompt
+        
+        full_prompt = prompt
+        for placeholder, content in placeholders.items():
+            # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
+            full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
+            full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
+        
+        return full_prompt
+
+    def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
+        """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
+        try:
+            # Check if debug logging is enabled
+            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if not debug_enabled:
+                return
+                
+            import os
+            from datetime import datetime, UTC
+            # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
+            # .../gateway/modules/services/serviceAi/subCoreAi.py -> up to gateway root
+            gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+            outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
+            os.makedirs(outDir, exist_ok=True)
+            ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+            suffix = []
+            if partIndex is not None:
+                suffix.append(f"part{partIndex}")
+            if continuation is not None:
+                suffix.append(f"cont_{str(continuation).lower()}")
+            if modelName:
+                safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
+                suffix.append(safeModel)
+            suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
+            fname = f"{ts}_{label}{suffixStr}.txt"
+            fpath = os.path.join(outDir, fname)
+            with open(fpath, 'w', encoding='utf-8') as f:
+                f.write(content or '')
+        except Exception:
+            # Do not raise; best-effort debug write
+            pass
+
+    def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
+        """
+        Check if text exceeds model token limit with safety margin.
+        """
+        # Simple character-based estimation (4 chars per token)
+        estimated_tokens = len(text) // 4
+        max_tokens = int(model.maxTokens * (1 - safety_margin))
+        return estimated_tokens > max_tokens
+
+    def _reducePlanningPrompt(
+        self,
+        full_prompt: str,
+        placeholders: Optional[Dict[str, str]],
+        model: ModelCapabilities,
+        options: AiCallOptions
+        ) -> str:
+        """
+        Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
+        """
+        if not placeholders:
+            return self._reduceText(full_prompt, 0.7)
+        
+        # Reduce placeholders while preserving prompt
+        reduced_placeholders = {}
+        for placeholder, content in placeholders.items():
+            if len(content) > 1000:  # Only reduce long content
+                reduction_factor = 0.7
+                reduced_content = self._reduceText(content, reduction_factor)
+                reduced_placeholders[placeholder] = reduced_content
+            else:
+                reduced_placeholders[placeholder] = content
+        
+        return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
+
+    def _reduceTextPrompt(
+        self,
+        prompt: str,
+        context: str,
+        model: ModelCapabilities,
+        options: AiCallOptions
+        ) -> str:
+        """
+        Reduce text prompt size using typeGroup-aware chunking and merging.
+        """
+        max_size = int(model.maxTokens * (1 - options.safetyMargin))
+        
+        if options.compressPrompt:
+            # Reduce both prompt and context
+            target_size = max_size
+            current_size = len(prompt) + len(context)
+            reduction_factor = (target_size * 0.7) / current_size
+            
+            if reduction_factor < 1.0:
+                prompt = self._reduceText(prompt, reduction_factor)
+                context = self._reduceText(context, reduction_factor)
+        else:
+            # Only reduce context, preserve prompt integrity
+            max_context_size = max_size - len(prompt)
+            if len(context) > max_context_size:
+                reduction_factor = max_context_size / len(context)
+                context = self._reduceText(context, reduction_factor)
+        
+        return prompt + "\n\n" + context if context else prompt
+
+    def _extractTextFromContentParts(self, extracted_content) -> str:
+        """
+        Extract text content from ExtractionService ContentPart objects.
+        """
+        if not extracted_content or not hasattr(extracted_content, 'parts'):
+            return ""
+        
+        text_parts = []
+        for part in extracted_content.parts:
+            if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
+                if hasattr(part, 'data') and part.data:
+                    text_parts.append(part.data)
+        
+        return "\n\n".join(text_parts)
+
+    def _reduceText(self, text: str, reduction_factor: float) -> str:
+        """
+        Reduce text size by the specified factor.
+        """
+        if reduction_factor >= 1.0:
+            return text
+        
+        target_length = int(len(text) * reduction_factor)
+        return text[:target_length] + "... [reduced]"
--- a/modules/services/serviceAi/subDocumentGeneration.py
+++ b/modules/services/serviceAi/subDocumentGeneration.py
@ -0,0 +1,804 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+
+logger = logging.getLogger(__name__)
+
+
+class SubDocumentGeneration:
+    """Document generation operations including single-file and multi-file generation."""
+
+    def __init__(self, services, aiObjects, documentProcessor):
+        """Initialize document generation service.
+        
+        Args:
+            services: Service center instance for accessing other services
+            aiObjects: Initialized AiObjects instance
+            documentProcessor: Document processing service instance
+        """
+        self.services = services
+        self.aiObjects = aiObjects
+        self.documentProcessor = documentProcessor
+
+    async def callAiWithDocumentGeneration(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        outputFormat: str,
+        title: Optional[str]
+        ) -> Dict[str, Any]:
+        """
+        Handle AI calls with document generation in specific output format.
+        Now supports both single-file and multi-file generation.
+        
+        Args:
+            prompt: The main prompt for the AI call
+            documents: Optional list of documents to process
+            options: AI call configuration options
+            outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
+            title: Optional title for generated documents
+        
+        Returns:
+            Dict with generated documents and metadata
+        """
+        try:
+            # Use AI to analyze prompt intent
+            prompt_analysis = await self._analyzePromptIntent(prompt, self)
+            logger.info(f"Prompt analysis result: {prompt_analysis}")
+            
+            if prompt_analysis.get("is_multi_file", False):
+                return await self._callAiWithMultiFileGeneration(
+                    prompt, documents, options, outputFormat, title, prompt_analysis
+                )
+            else:
+                return await self._callAiWithSingleFileGeneration(
+                    prompt, documents, options, outputFormat, title
+                )
+            
+        except Exception as e:
+            logger.error(f"Error in document generation: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "content": "",
+                "rendered_content": "",
+                "mime_type": "text/plain",
+                "filename": f"error_{outputFormat}",
+                "format": outputFormat,
+                "title": title or "Error",
+                "documents": []
+            }
+
+    async def _callAiWithSingleFileGeneration(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        outputFormat: str,
+        title: Optional[str],
+        generationPrompt: Optional[str] = None
+        ) -> Dict[str, Any]:
+        """Handle single-file document generation (existing functionality)."""
+        try:
+            # Get format-specific extraction prompt from generation service
+            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+            generation_service = GenerationService(self.services)
+            
+            # Use default title if not provided
+            if not title:
+                title = "AI Generated Document"
+            
+            # Get format-specific extraction prompt
+            extractionPrompt = await generation_service.getExtractionPrompt(
+                outputFormat=outputFormat,
+                userPrompt=prompt,
+                title=title,
+                aiService=self
+            )
+            
+            # Process documents with format-specific prompt using JSON mode
+            # This ensures structured JSON output instead of text
+            aiResponseJson = await self._callAiJson(extractionPrompt, documents, options)
+
+            # Validate JSON response
+            if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson:
+                raise Exception("AI response is not valid JSON document structure")
+
+            # Emit raw extracted data as a chat message attachment before rendering
+            try:
+                await self._postRawDataChatMessage(aiResponseJson, label="raw_extraction_single")
+            except Exception:
+                logger.warning("Failed to emit raw extraction chat message (single-file)")
+
+            # Generate filename from document metadata
+            parsedFilename = None
+            try:
+                if aiResponseJson.get("metadata", {}).get("title"):
+                    title = aiResponseJson["metadata"]["title"]
+                    # Clean title for filename
+                    import re
+                    parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title)
+                    parsed = re.sub(r"-+", "-", parsed).strip('-')
+                    if parsed:
+                        parsedFilename = f"{parsed}.{outputFormat}"
+            except Exception:
+                parsedFilename = None
+            
+            # Use AI generation to enhance the extracted JSON before rendering
+            enhancedContent = aiResponseJson  # Default to original
+            if prompt:
+                try:
+                    from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+                    
+                    # Get generation prompt
+                    generationPrompt = await generation_service.getGenerationPrompt(
+                        outputFormat=outputFormat,
+                        userPrompt=prompt,
+                        title=title,
+                        aiService=self
+                    )
+                    
+                    # Prepare the AI call
+                    request_options = AiCallOptions()
+                    request_options.operationType = OperationType.GENERAL
+                    
+                    # Create context with the extracted JSON content
+                    import json
+                    context = f"Extracted JSON content:\n{json.dumps(aiResponseJson, indent=2)}"
+                    
+                    request = AiCallRequest(
+                        prompt=generationPrompt,
+                        context=context,
+                        options=request_options
+                    )
+                    
+                    # Call AI to enhance the content
+                    response = await self.aiObjects.call(request)
+                    
+                    if response and response.content:
+                        # Parse the AI response as JSON
+                        try:
+                            import re
+                            result = response.content.strip()
+                            
+                            # Extract JSON from markdown if present
+                            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+                            if json_match:
+                                result = json_match.group(1).strip()
+                            elif result.startswith('```json'):
+                                result = re.sub(r'^```json\s*', '', result)
+                                result = re.sub(r'\s*```$', '', result)
+                            elif result.startswith('```'):
+                                result = re.sub(r'^```\s*', '', result)
+                                result = re.sub(r'\s*```$', '', result)
+                            
+                            # Try to parse JSON
+                            enhancedContent = json.loads(result)
+                            logger.info(f"AI enhanced JSON content successfully")
+                            
+                        except json.JSONDecodeError as e:
+                            logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
+                            enhancedContent = aiResponseJson
+                    else:
+                        logger.warning("AI generation returned empty response, using original content")
+                        enhancedContent = aiResponseJson
+                        
+                except Exception as e:
+                    logger.warning(f"AI generation failed: {str(e)}, using original content")
+                    enhancedContent = aiResponseJson
+            
+            # Render the enhanced JSON content
+            renderedContent, mimeType = await generation_service.renderReport(
+                extractedContent=enhancedContent,
+                outputFormat=outputFormat,
+                title=title,
+                userPrompt=prompt,
+                aiService=self
+            )
+            
+            # Generate meaningful filename (use AI-provided if valid, else fallback)
+            from datetime import datetime, UTC
+            timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+            if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"):
+                filename = parsedFilename
+            else:
+                safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-')
+                filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}"
+            
+            # Return structured result with document information
+            return {
+                "success": True,
+                "content": aiResponseJson,  # Structured JSON document
+                "rendered_content": renderedContent,  # Formatted content
+                "mime_type": mimeType,
+                "filename": filename,
+                "format": outputFormat,
+                "title": title,
+                "documents": [{
+                    "documentName": filename,
+                    "documentData": renderedContent,
+                    "mimeType": mimeType
+                }],
+                "is_multi_file": False
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in single-file document generation: {str(e)}")
+            raise
+
+    async def _callAiWithMultiFileGeneration(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        outputFormat: str,
+        title: Optional[str],
+        prompt_analysis: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Handle multi-file document generation using AI analysis."""
+        try:
+            # Get multi-file extraction prompt based on AI analysis
+            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+            generation_service = GenerationService(self.services)
+            
+            # Use default title if not provided
+            if not title:
+                title = "AI Generated Documents"
+            
+            # Get adaptive extraction prompt
+            extraction_prompt = await generation_service.getAdaptiveExtractionPrompt(
+                outputFormat=outputFormat,
+                userPrompt=prompt,
+                title=title,
+                promptAnalysis=prompt_analysis,
+                aiService=self
+            )
+            
+            logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters")
+            logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...")
+            
+            # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt
+            logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars")
+            logger.debug(f"Processing documents: {len(documents) if documents else 0} documents")
+            
+            # Use the existing pipeline but replace the prompt with our adaptive one
+            # This ensures proper document processing while using the multi-file prompt
+            ai_response = await self.documentProcessor.processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options)
+            
+            logger.info(f"AI response type: {type(ai_response)}")
+            logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}")
+            logger.debug(f"AI response preview: {str(ai_response)[:500]}...")
+            
+            # Validate response structure
+            if not self._validateResponseStructure(ai_response, prompt_analysis):
+                # Fallback to single-file if multi-file fails
+                logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}")
+                logger.warning(f"Prompt analysis: {prompt_analysis}")
+                logger.warning("Falling back to single-file generation")
+                return await self._callAiWithSingleFileGeneration(
+                    prompt, documents, options, outputFormat, title
+                )
+            
+            # Emit raw extracted data as a chat message attachment before transformation/rendering
+            try:
+                await self._postRawDataChatMessage(ai_response, label="raw_extraction_multi")
+            except Exception:
+                logger.warning("Failed to emit raw extraction chat message (multi-file)")
+
+            # Process multiple documents
+            generated_documents = []
+            for i, doc_data in enumerate(ai_response.get("documents", [])):
+                # Transform AI-generated sections to renderer-compatible format
+                transformed_sections = []
+                for section in doc_data.get("sections", []):
+                    # Convert AI format to renderer format
+                    transformed_section = {
+                        "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
+                        "content_type": section.get("content_type", "paragraph"),
+                        "elements": section.get("elements", []),
+                        "order": section.get("order", len(transformed_sections) + 1)
+                    }
+                    
+                    # Extract text from elements for simple text-based sections
+                    if section.get("content_type") in ["paragraph", "heading"]:
+                        text_parts = []
+                        for element in section.get("elements", []):
+                            if "text" in element:
+                                text_parts.append(element["text"])
+                        # Add text to the first element or create a new one
+                        if transformed_section["elements"]:
+                            transformed_section["elements"][0]["text"] = "\n".join(text_parts)
+                        else:
+                            transformed_section["elements"] = [{"text": "\n".join(text_parts)}]
+                    
+                    transformed_sections.append(transformed_section)
+                
+                # Create complete document structure for rendering
+                complete_document = {
+                    "metadata": {
+                        "title": doc_data["title"],
+                        "source_document": "multi_file_generation",
+                        "document_id": doc_data.get("id", f"doc_{i+1}"),
+                        "filename": doc_data.get("filename", f"document_{i+1}"),
+                        "split_strategy": prompt_analysis.get("strategy", "custom")
+                    },
+                    "sections": transformed_sections,
+                    "summary": f"Generated document: {doc_data['title']}",
+                    "tags": ["multi_file", "ai_generated"]
+                }
+                
+                # Use AI generation to enhance the extracted JSON before rendering
+                enhancedContent = complete_document  # Default to original
+                if prompt:
+                    try:
+                        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+                        
+                        # Get generation prompt
+                        generationPrompt = await generation_service.getGenerationPrompt(
+                            outputFormat=outputFormat,
+                            userPrompt=prompt,
+                            title=doc_data["title"],
+                            aiService=self
+                        )
+                        
+                        # Prepare the AI call
+                        request_options = AiCallOptions()
+                        request_options.operationType = OperationType.GENERAL
+                        
+                        # Create context with the extracted JSON content
+                        import json
+                        context = f"Extracted JSON content:\n{json.dumps(complete_document, indent=2)}"
+                        
+                        request = AiCallRequest(
+                            prompt=generationPrompt,
+                            context=context,
+                            options=request_options
+                        )
+                        
+                        # Call AI to enhance the content
+                        response = await self.aiObjects.call(request)
+                        
+                        if response and response.content:
+                            # Parse the AI response as JSON
+                            try:
+                                import re
+                                result = response.content.strip()
+                                
+                                # Extract JSON from markdown if present
+                                json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+                                if json_match:
+                                    result = json_match.group(1).strip()
+                                elif result.startswith('```json'):
+                                    result = re.sub(r'^```json\s*', '', result)
+                                    result = re.sub(r'\s*```$', '', result)
+                                elif result.startswith('```'):
+                                    result = re.sub(r'^```\s*', '', result)
+                                    result = re.sub(r'\s*```$', '', result)
+                                
+                                # Try to parse JSON
+                                enhancedContent = json.loads(result)
+                                logger.info(f"AI enhanced JSON content successfully")
+                                
+                            except json.JSONDecodeError as e:
+                                logger.warning(f"AI generation returned invalid JSON: {str(e)}, attempting to repair...")
+                                # Try to repair common JSON issues
+                                try:
+                                    repaired_result = self._repairJson(result)
+                                    enhancedContent = json.loads(repaired_result)
+                                    logger.info(f"Successfully repaired JSON content")
+                                except (json.JSONDecodeError, Exception) as repair_error:
+                                    logger.warning(f"JSON repair failed: {str(repair_error)}, trying AI repair...")
+                                    # Try AI-powered JSON repair as last resort
+                                    try:
+                                        ai_repaired = await self._repairJsonWithAI(result)
+                                        enhancedContent = json.loads(ai_repaired)
+                                        logger.info(f"AI successfully repaired JSON content")
+                                    except Exception as ai_repair_error:
+                                        logger.warning(f"AI JSON repair also failed: {str(ai_repair_error)}, using original content")
+                                        enhancedContent = complete_document
+                        else:
+                            logger.warning("AI generation returned empty response, using original content")
+                            enhancedContent = complete_document
+                            
+                    except Exception as e:
+                        logger.warning(f"AI generation failed: {str(e)}, using original content")
+                        enhancedContent = complete_document
+                
+                # Render the enhanced JSON content
+                rendered_content, mime_type = await generation_service.renderReport(
+                    extractedContent=enhancedContent,
+                    outputFormat=outputFormat,
+                    title=doc_data["title"],
+                    userPrompt=prompt,
+                    aiService=self
+                )
+                
+                # Generate proper filename with correct extension
+                base_filename = doc_data.get("filename", f"document_{i+1}")
+                # Remove any existing extension and add the correct one
+                if '.' in base_filename:
+                    base_filename = base_filename.rsplit('.', 1)[0]
+                
+                # Add proper extension based on output format
+                if outputFormat.lower() == "docx":
+                    filename = f"{base_filename}.docx"
+                elif outputFormat.lower() == "pdf":
+                    filename = f"{base_filename}.pdf"
+                elif outputFormat.lower() == "html":
+                    filename = f"{base_filename}.html"
+                else:
+                    filename = f"{base_filename}.{outputFormat}"
+                
+                generated_documents.append({
+                    "documentName": filename,
+                    "documentData": rendered_content,
+                    "mimeType": mime_type
+                })
+            
+            # Save debug files for multi-file generation - only if debug enabled
+            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if debug_enabled:
+                try:
+                    import os
+                    from datetime import datetime, UTC
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                    debug_root = "./test-chat/ai"
+                    debug_dir = os.path.join(debug_root, f"multifile_output_{ts}")
+                    os.makedirs(debug_dir, exist_ok=True)
+                    
+                    # Save metadata
+                    with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f:
+                        f.write(f"title: {title}\n")
+                        f.write(f"format: {outputFormat}\n")
+                        f.write(f"documents_count: {len(generated_documents)}\n")
+                        f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n")
+                        f.write(f"prompt_analysis: {prompt_analysis}\n")
+                    
+                    # Save each generated document
+                    for i, doc in enumerate(generated_documents):
+                        doc_filename = doc["documentName"]
+                        doc_data = doc["documentData"]
+                        doc_mime = doc["mimeType"]
+                        
+                        # Determine file extension
+                        if outputFormat.lower() == "docx":
+                            file_ext = ".docx"
+                        elif outputFormat.lower() == "pdf":
+                            file_ext = ".pdf"
+                        elif outputFormat.lower() == "html":
+                            file_ext = ".html"
+                        else:
+                            file_ext = f".{outputFormat}"
+                        
+                        # Save the rendered document
+                        output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}")
+                        
+                        if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
+                            # Text-based formats
+                            with open(output_path, 'w', encoding='utf-8') as f:
+                                f.write(doc_data)
+                        else:
+                            # Binary formats - decode from base64 if needed
+                            try:
+                                import base64
+                                doc_bytes = base64.b64decode(doc_data)
+                                with open(output_path, 'wb') as f:
+                                    f.write(doc_bytes)
+                            except Exception:
+                                # If not base64, save as text
+                                with open(output_path, 'w', encoding='utf-8') as f:
+                                    f.write(doc_data)
+                        
+                        logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}")
+                    
+                    logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}")
+                    
+                except Exception as e:
+                    logger.warning(f"Failed to save multi-file debug output: {e}")
+            
+            return {
+                "success": True,
+                "content": ai_response,
+                "rendered_content": None,  # Not applicable for multi-file
+                "mime_type": None,  # Not applicable for multi-file
+                "filename": None,  # Not applicable for multi-file
+                "format": outputFormat,
+                "title": title,
+                "documents": generated_documents,
+                "is_multi_file": True,
+                "split_strategy": prompt_analysis.get("strategy", "custom")
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in multi-file document generation: {str(e)}")
+            # Fallback to single-file
+            return await self._callAiWithSingleFileGeneration(
+                prompt, documents, options, outputFormat, title
+            )
+
+    async def _callAiJson(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions
+        ) -> Dict[str, Any]:
+        """
+        Handle AI calls with document processing for JSON output.
+        Returns structured JSON document instead of text.
+        """
+        # Process documents with JSON merging
+        return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
+
+    async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Use AI to analyze user prompt and determine processing requirements."""
+        if not ai_service:
+            return {"is_multi_file": False, "strategy": "single", "criteria": None}
+        
+        try:
+            analysis_prompt = f"""
+Analyze this user request and determine if it requires multiple file output or single file output.
+
+User request: "{prompt}"
+
+Respond with JSON only in this exact format:
+{{
+    "is_multi_file": true/false,
+    "strategy": "single|per_entity|by_section|by_criteria|custom",
+    "criteria": "description of how to split content",
+    "file_naming_pattern": "suggested pattern for filenames",
+    "reasoning": "brief explanation of the analysis"
+}}
+
+Consider:
+- Does the user want separate files for different entities (customers, products, etc.)?
+- Does the user want to split content into multiple documents?
+- What would be the most logical way to organize the content?
+- What language is the request in? (analyze in the original language)
+
+Return only the JSON response.
+"""
+            
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+            response = await ai_service.aiObjects.call(request)
+            
+            if response and response.content:
+                import json
+                import re
+                
+                # Extract JSON from response
+                result = response.content.strip()
+                json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                
+                analysis = json.loads(result)
+                return analysis
+            else:
+                return {"is_multi_file": False, "strategy": "single", "criteria": None}
+                
+        except Exception as e:
+            logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
+            return {"is_multi_file": False, "strategy": "single", "criteria": None}
+
+    def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool:
+        """Validate that AI response matches the expected structure."""
+        try:
+            if not isinstance(response, dict):
+                logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
+                return False
+            
+            # Check for multi-file structure
+            if prompt_analysis.get("is_multi_file", False):
+                has_documents = "documents" in response
+                is_documents_list = isinstance(response.get("documents"), list)
+                logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}")
+                if has_documents and is_documents_list:
+                    logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found")
+                else:
+                    logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}")
+                    logger.warning(f"Available keys: {list(response.keys())}")
+                return has_documents and is_documents_list
+            else:
+                has_sections = "sections" in response
+                is_sections_list = isinstance(response.get("sections"), list)
+                logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}")
+                return has_sections and is_sections_list
+        except Exception as e:
+            logger.warning(f"Response validation failed with exception: {str(e)}")
+            return False
+
+    async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
+        """
+        Create a ChatMessage with the extracted raw JSON attached as a file so the user
+        has access to the data even if downstream processing fails.
+        """
+        try:
+            services = self.services
+            workflow = services.currentWorkflow
+
+            # Serialize payload
+            import json as _json
+            from datetime import datetime, UTC
+            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+            content_text = _json.dumps(payload, ensure_ascii=False, indent=2)
+            content_bytes = content_text.encode('utf-8')
+
+            # Store as file via component storage
+            file_name = f"{label}_{ts}.json"
+            file_item = services.interfaceDbComponent.createFile(
+                name=file_name,
+                mimeType="application/json",
+                content=content_bytes
+            )
+            services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
+
+            # Lookup file info for ChatDocument
+            file_info = services.workflow.getFileInfo(file_item.id)
+            doc = ChatDocument(
+                messageId="",  # set after message creation
+                fileId=file_item.id,
+                fileName=file_info.get("fileName", file_name) if file_info else file_name,
+                fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
+                mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
+            )
+
+            # Create message referencing the file
+            messageData = {
+                "workflowId": workflow.id,
+                "role": "assistant",
+                "message": "Raw extraction data saved",
+                "status": "data",
+                "sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
+                "publishedAt": services.utils.getUtcTimestamp(),
+                "documentsLabel": label,
+                "documents": []
+            }
+            message = services.workflow.createMessage(messageData)
+            if not message:
+                return
+
+            # Persist ChatDocument with messageId
+            doc.messageId = message.id
+            services.interfaceDbChat.createDocument(doc.to_dict())
+
+            # Update message to include document
+            try:
+                if not message.documents:
+                    message.documents = []
+                message.documents.append(doc)
+                services.workflow.updateMessage(message.id, {"documents": [d.to_dict() for d in message.documents]})
+            except Exception:
+                pass
+        except Exception:
+            # Non-fatal; ignore if storage or chat creation fails
+            return
+    
+    def _repairJson(self, json_string: str) -> str:
+        """Repair common JSON syntax errors efficiently for large JSON."""
+        try:
+            import re
+            import json
+            
+            # Remove any leading/trailing whitespace
+            json_string = json_string.strip()
+            
+            # For large JSON, skip substring extraction and go straight to targeted repairs
+            logger.info(f"Attempting JSON repair for {len(json_string)} characters...")
+            
+            # Try to parse first to see what specific error we get
+            try:
+                json.loads(json_string)
+                return json_string  # Already valid
+            except json.JSONDecodeError as e:
+                error_msg = str(e)
+                logger.info(f"JSON error: {error_msg}")
+                
+                # Apply targeted fixes based on the specific error
+                if "Expecting ',' delimiter" in error_msg:
+                    # Fix missing commas between array elements
+                    json_string = re.sub(r'\]\s*\[', '], [', json_string)
+                    json_string = re.sub(r'\}\s*\{', '}, {', json_string)
+                    # Fix missing commas between object properties
+                    json_string = re.sub(r'("\s*:\s*[^,}]+)\s*(")', r'\1, \2', json_string)
+                
+                if "Expecting value" in error_msg:
+                    # Fix missing values (replace empty with null)
+                    json_string = re.sub(r':\s*,', ': null,', json_string)
+                    json_string = re.sub(r':\s*}', ': null}', json_string)
+                
+                if "Expecting property name" in error_msg:
+                    # Fix unquoted property names
+                    json_string = re.sub(r'(\w+):', r'"\1":', json_string)
+                
+                # Fix trailing commas before closing brackets/braces
+                json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
+                
+                # Fix missing closing brackets/braces (only if reasonable)
+                open_braces = json_string.count('{')
+                close_braces = json_string.count('}')
+                open_brackets = json_string.count('[')
+                close_brackets = json_string.count(']')
+                
+                # Only add missing brackets if the difference is small (avoid runaway)
+                if 0 < (open_braces - close_braces) <= 5:
+                    missing_braces = open_braces - close_braces
+                    json_string += '}' * missing_braces
+                
+                if 0 < (open_brackets - close_brackets) <= 5:
+                    missing_brackets = open_brackets - close_brackets
+                    json_string += ']' * missing_brackets
+                
+                # Try to parse again
+                try:
+                    json.loads(json_string)
+                    logger.info("JSON repair successful")
+                    return json_string
+                except json.JSONDecodeError:
+                    logger.warning("JSON repair failed - will try AI repair")
+                    return json_string
+            
+        except Exception as e:
+            logger.warning(f"JSON repair failed: {str(e)}")
+            return json_string
+    
+    async def _repairJsonWithAI(self, malformed_json: str) -> str:
+        """Use AI to repair malformed JSON efficiently for large files."""
+        try:
+            # Limit JSON size for AI processing (max 50KB to avoid token limits)
+            max_json_size = 50000
+            json_to_repair = malformed_json
+            
+            if len(malformed_json) > max_json_size:
+                logger.warning(f"JSON too large ({len(malformed_json)} chars), truncating to {max_json_size} chars for AI repair")
+                # Try to find a good truncation point (end of a complete object/array)
+                truncate_at = max_json_size
+                for i in range(max_json_size, max(0, max_json_size - 1000), -1):
+                    if malformed_json[i] in ['}', ']']:
+                        truncate_at = i + 1
+                        break
+                json_to_repair = malformed_json[:truncate_at] + "..."
+            
+            repair_prompt = f"""
+You are a JSON repair expert. Fix the following malformed JSON and return ONLY the corrected JSON, no explanations.
+
+Malformed JSON:
+{json_to_repair}
+
+Return only the valid JSON:
+"""
+            
+            # Use AI to repair the JSON
+            repaired_json = await self.services.ai.callAi(
+                prompt=repair_prompt,
+                documents=None,
+                options={
+                    "process_type": "text",
+                    "operation_type": "generate_content",
+                    "priority": "speed",
+                    "max_cost": 0.01
+                }
+            )
+            
+            # Clean up the response (remove any markdown formatting)
+            repaired_json = repaired_json.strip()
+            if repaired_json.startswith('```json'):
+                repaired_json = repaired_json[7:]
+            if repaired_json.endswith('```'):
+                repaired_json = repaired_json[:-3]
+            repaired_json = repaired_json.strip()
+            
+            # Validate the repaired JSON
+            import json
+            json.loads(repaired_json)
+            logger.info("AI JSON repair successful")
+            return repaired_json
+            
+        except Exception as e:
+            logger.warning(f"AI JSON repair failed: {str(e)}")
+            return malformed_json
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
--- a/modules/services/serviceAi/subUtilities.py
+++ b/modules/services/serviceAi/subUtilities.py
@ -0,0 +1,316 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelAi import ModelCapabilities, AiCallOptions
+
+logger = logging.getLogger(__name__)
+
+
+class SubUtilities:
+    """Utility functions for text processing, debugging, and helper operations."""
+
+    def __init__(self, services):
+        """Initialize utilities service.
+        
+        Args:
+            services: Service center instance for accessing other services
+        """
+        self.services = services
+
+    def _writeTraceLog(self, contextText: str, data: Any) -> None:
+        """Write raw data to the central trace log file without truncation."""
+        try:
+            import os
+            import json
+            from datetime import datetime, UTC
+            # Only write if logger is in debug mode
+            if logger.level > logging.DEBUG:
+                return
+            # Get log directory from configuration via service center if possible
+            logDir = None
+            try:
+                logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
+            except Exception:
+                pass
+            if not logDir:
+                logDir = "./"
+            if not os.path.isabs(logDir):
+                # Make it relative to gateway directory
+                gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+                logDir = os.path.join(gatewayDir, logDir)
+            os.makedirs(logDir, exist_ok=True)
+            traceFile = os.path.join(logDir, "log_trace.log")
+            timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+            traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n"
+            if data is None:
+                traceEntry += "No data provided\n"
+            else:
+                # Prefer exact text; if dict/list, pretty print JSON
+                try:
+                    if isinstance(data, (dict, list)):
+                        traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n"
+                    else:
+                        text = str(data)
+                        traceEntry += f"Text Data:\n{text}\n"
+                except Exception:
+                    traceEntry += f"Data (fallback): {str(data)}\n"
+            traceEntry += ("=" * 80) + "\n\n"
+            with open(traceFile, "a", encoding="utf-8") as f:
+                f.write(traceEntry)
+        except Exception:
+            # Swallow to avoid recursive logging issues
+            pass
+
+    def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
+        """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
+        try:
+            # Check if debug logging is enabled
+            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if not debug_enabled:
+                return
+                
+            import os
+            from datetime import datetime, UTC
+            # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
+            # .../gateway/modules/services/serviceAi/subUtilities.py -> up to gateway root
+            gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+            outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
+            os.makedirs(outDir, exist_ok=True)
+            ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+            suffix = []
+            if partIndex is not None:
+                suffix.append(f"part{partIndex}")
+            if continuation is not None:
+                suffix.append(f"cont_{str(continuation).lower()}")
+            if modelName:
+                safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
+                suffix.append(safeModel)
+            suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
+            fname = f"{ts}_{label}{suffixStr}.txt"
+            fpath = os.path.join(outDir, fname)
+            with open(fpath, 'w', encoding='utf-8') as f:
+                f.write(content or '')
+        except Exception:
+            # Do not raise; best-effort debug write
+            pass
+
+    def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
+        """
+        Check if text exceeds model token limit with safety margin.
+        """
+        # Simple character-based estimation (4 chars per token)
+        estimated_tokens = len(text) // 4
+        max_tokens = int(model.maxTokens * (1 - safety_margin))
+        return estimated_tokens > max_tokens
+
+    def _reduceText(self, text: str, reduction_factor: float) -> str:
+        """
+        Reduce text size by the specified factor.
+        """
+        if reduction_factor >= 1.0:
+            return text
+        
+        target_length = int(len(text) * reduction_factor)
+        return text[:target_length] + "... [reduced]"
+
+    def _extractTextFromContentParts(self, extracted_content) -> str:
+        """
+        Extract text content from ExtractionService ContentPart objects.
+        """
+        if not extracted_content or not hasattr(extracted_content, 'parts'):
+            return ""
+        
+        text_parts = []
+        for part in extracted_content.parts:
+            if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
+                if hasattr(part, 'data') and part.data:
+                    text_parts.append(part.data)
+        
+        return "\n\n".join(text_parts)
+
+    def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
+        """
+        Build full prompt by replacing placeholders with their content.
+        Uses the new {{KEY:placeholder}} format.
+        """
+        if not placeholders:
+            return prompt
+        
+        full_prompt = prompt
+        for placeholder, content in placeholders.items():
+            # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
+            full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
+            full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
+        
+        return full_prompt
+
+    def _reducePlanningPrompt(
+        self,
+        full_prompt: str,
+        placeholders: Optional[Dict[str, str]],
+        model: ModelCapabilities,
+        options: AiCallOptions
+        ) -> str:
+        """
+        Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
+        """
+        if not placeholders:
+            return self._reduceText(full_prompt, 0.7)
+        
+        # Reduce placeholders while preserving prompt
+        reduced_placeholders = {}
+        for placeholder, content in placeholders.items():
+            if len(content) > 1000:  # Only reduce long content
+                reduction_factor = 0.7
+                reduced_content = self._reduceText(content, reduction_factor)
+                reduced_placeholders[placeholder] = reduced_content
+            else:
+                reduced_placeholders[placeholder] = content
+        
+        return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
+
+    def _reduceTextPrompt(
+        self,
+        prompt: str,
+        context: str,
+        model: ModelCapabilities,
+        options: AiCallOptions
+        ) -> str:
+        """
+        Reduce text prompt size using typeGroup-aware chunking and merging.
+        """
+        max_size = int(model.maxTokens * (1 - options.safetyMargin))
+        
+        if options.compressPrompt:
+            # Reduce both prompt and context
+            target_size = max_size
+            current_size = len(prompt) + len(context)
+            reduction_factor = (target_size * 0.7) / current_size
+            
+            if reduction_factor < 1.0:
+                prompt = self._reduceText(prompt, reduction_factor)
+                context = self._reduceText(context, reduction_factor)
+        else:
+            # Only reduce context, preserve prompt integrity
+            max_context_size = max_size - len(prompt)
+            if len(context) > max_context_size:
+                reduction_factor = max_context_size / len(context)
+                context = self._reduceText(context, reduction_factor)
+        
+        return prompt + "\n\n" + context if context else prompt
+
+    async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
+        """Compress content to target size."""
+        if len(content.encode("utf-8")) <= targetSize:
+            return content
+
+        try:
+            compressionPrompt = f"""
+            Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen, 
+            behalte aber alle wichtigen Informationen bei:
+            
+            {content}
+            
+            Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
+            """
+
+            # Service must not call connectors directly; use simple truncation fallback here
+            data = content.encode("utf-8")
+            return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
+        except Exception as e:
+            logger.warning(f"AI compression failed, using truncation: {str(e)}")
+            return content[:targetSize] + "... [truncated]"
+
+    def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List], options: AiCallOptions) -> Dict[str, int]:
+        """
+        Get model capabilities for content processing, including appropriate size limits for chunking.
+        """
+        # Estimate total content size
+        prompt_size = len(prompt.encode('utf-8'))
+        document_size = 0
+        if documents:
+            # Rough estimate of document content size
+            for doc in documents:
+                document_size += getattr(doc, 'fileSize', 0) or 0
+        
+        total_size = prompt_size + document_size
+        
+        # Use AiObjects to select the best model for this content size
+        # We'll simulate the model selection by checking available models
+        from modules.interfaces.interfaceAiObjects import aiModels
+        
+        # Find the best model for this content size and operation
+        best_model = None
+        best_context_length = 0
+        
+        for model_name, model_info in aiModels.items():
+            context_length = model_info.get("contextLength", 0)
+            
+            # Skip models with no context length or too small for content
+            if context_length == 0:
+                continue
+                
+            # Check if model supports the operation type
+            capabilities = model_info.get("capabilities", [])
+            from modules.datamodels.datamodelAi import OperationType
+            if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
+                continue
+            elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
+                continue
+            elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
+                continue
+            elif "text_generation" not in capabilities:
+                continue
+            
+            # Prefer models that can handle the content without chunking, but allow chunking if needed
+            if context_length >= total_size * 0.8:  # 80% of content size
+                if context_length > best_context_length:
+                    best_model = model_info
+                    best_context_length = context_length
+            elif best_model is None:  # Fallback to largest available model
+                if context_length > best_context_length:
+                    best_model = model_info
+                    best_context_length = context_length
+        
+        # Fallback to a reasonable default if no model found
+        if best_model is None:
+            best_model = {
+                "contextLength": 128000,  # GPT-4o default
+                "llmName": "gpt-4o"
+            }
+        
+        # Calculate appropriate sizes
+        # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
+        context_length_bytes = int(best_model["contextLength"] * 4)
+        max_context_bytes = int(context_length_bytes * 0.9)  # 90% of context length
+        text_chunk_size = int(max_context_bytes * 0.7)  # 70% of max context for text chunks
+        image_chunk_size = int(max_context_bytes * 0.8)  # 80% of max context for image chunks
+        
+        logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
+        logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
+        logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
+        
+        return {
+            "maxContextBytes": max_context_bytes,
+            "textChunkSize": text_chunk_size,
+            "imageChunkSize": image_chunk_size
+        }
+
+    def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
+        """
+        Get models capable of handling the specific operation with capability filtering.
+        """
+        # Use the actual AI objects model selection instead of hardcoded default
+        if hasattr(self, 'aiObjects') and self.aiObjects:
+            # Let AiObjects handle the model selection
+            return []
+        else:
+            # Fallback to default model if AiObjects not available
+            default_model = ModelCapabilities(
+                name="default",
+                maxTokens=4000,
+                capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
+                costPerToken=0.001,
+                processingTime=1.0,
+                isAvailable=True
+            )
+            return [default_model]
--- a/modules/services/serviceAi/subWebResearch.py
+++ b/modules/services/serviceAi/subWebResearch.py
@ -0,0 +1,384 @@
+import logging
+from typing import Dict, Any, List, Optional, Tuple, Union
+from modules.datamodels.datamodelWeb import (
+    WebResearchRequest,
+    WebResearchActionResult,
+    WebResearchDocumentData,
+    WebResearchActionDocument,
+    WebSearchResultItem,
+)
+from modules.interfaces.interfaceAiObjects import AiObjects
+from modules.shared.configuration import APP_CONFIG
+
+logger = logging.getLogger(__name__)
+
+
+class SubWebResearch:
+    """Web research operations including search, crawling, and analysis."""
+
+    def __init__(self, services, aiObjects):
+        """Initialize web research service.
+        
+        Args:
+            services: Service center instance for accessing other services
+            aiObjects: Initialized AiObjects instance
+        """
+        self.services = services
+        self.aiObjects = aiObjects
+
+    async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
+        """Perform web research using interface functions."""
+        try:
+            logger.info(f"WEB RESEARCH STARTED")
+            logger.info(f"User Query: {request.user_prompt}")
+            logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
+            
+            # Global URL index to track all processed URLs across the entire research session
+            global_processed_urls = set()
+            
+            # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
+            logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
+            
+            if request.urls:
+                # Use provided URLs as initial main URLs
+                websites = request.urls
+                logger.info(f"Using provided URLs ({len(websites)}):")
+                for i, url in enumerate(websites, 1):
+                    logger.info(f"   {i}. {url}")
+            else:
+                # Use AI to determine main URLs based on user's intention
+                logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
+                
+                # Use AI to generate optimized Tavily search query and search parameters
+                query_optimizer_prompt = f"""You are a search query optimizer. 
+
+        USER QUERY: {request.user_prompt}
+
+        Your task: Create a search query and parameters for the USER QUERY given.
+
+        RULES:
+        1. The search query MUST be related to the user query above
+        2. Extract key terms from the user query
+        3. Determine appropriate country/language based on the query context
+        4. Keep search query short (2-6 words)
+
+        Return ONLY this JSON format:
+        {{
+        "user_prompt": "search query based on user query above",
+        "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
+        "language": "language_code_or_null", 
+        "topic": "general|news|academic_or_null",
+        "time_range": "d|w|m|y_or_null",
+        "selection_strategy": "single|multiple|specific_page",
+        "selection_criteria": "what URLs to prioritize",
+        "expected_url_patterns": ["pattern1", "pattern2"],
+        "estimated_result_count": number
+        }}"""
+
+                # Get AI response for query optimization
+                from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
+                ai_request = AiCallRequest(
+                    prompt=query_optimizer_prompt,
+                    options=AiCallOptions()
+                )
+                ai_response_obj = await self.aiObjects.call(ai_request)
+                ai_response = ai_response_obj.content
+                logger.debug(f"AI query optimizer response: {ai_response}")
+                
+                # Parse AI response to extract search query
+                import json
+                try:
+                    # Clean the response by removing markdown code blocks
+                    cleaned_response = ai_response.strip()
+                    if cleaned_response.startswith('```json'):
+                        cleaned_response = cleaned_response[7:]  # Remove ```json
+                    if cleaned_response.endswith('```'):
+                        cleaned_response = cleaned_response[:-3]  # Remove ```
+                    cleaned_response = cleaned_response.strip()
+                    
+                    query_data = json.loads(cleaned_response)
+                    search_query = query_data.get("user_prompt", request.user_prompt)
+                    ai_country = query_data.get("country")
+                    ai_language = query_data.get("language")
+                    ai_topic = query_data.get("topic")
+                    ai_time_range = query_data.get("time_range")
+                    selection_strategy = query_data.get("selection_strategy", "multiple")
+                    selection_criteria = query_data.get("selection_criteria", "relevant URLs")
+                    expected_patterns = query_data.get("expected_url_patterns", [])
+                    estimated_count = query_data.get("estimated_result_count", request.max_results)
+                    
+                    logger.info(f"AI optimized search query: '{search_query}'")
+                    logger.info(f"Selection strategy: {selection_strategy}")
+                    logger.info(f"Selection criteria: {selection_criteria}")
+                    logger.info(f"Expected URL patterns: {expected_patterns}")
+                    logger.info(f"Estimated result count: {estimated_count}")
+                    
+                except json.JSONDecodeError:
+                    logger.warning("Failed to parse AI response as JSON, using original query")
+                    search_query = request.user_prompt
+                    ai_country = None
+                    ai_language = None
+                    ai_topic = None
+                    ai_time_range = None
+                    selection_strategy = "multiple"
+                
+                # Perform the web search with AI-determined parameters
+                search_kwargs = {
+                    "query": search_query,
+                    "max_results": request.max_results,
+                    "search_depth": request.options.search_depth,
+                    "auto_parameters": False  # Use explicit parameters
+                }
+                
+                # Add parameters only if they have valid values
+                def _normalizeCountry(c: Optional[str]) -> Optional[str]:
+                    if not c:
+                        return None
+                    s = str(c).strip()
+                    if not s or s.lower() in ['null', 'none', 'undefined']:
+                        return None
+                    # Map common codes to full English names when easy to do without extra deps
+                    mapping = {
+                        'ch': 'Switzerland', 'che': 'Switzerland',
+                        'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
+                        'at': 'Austria', 'aut': 'Austria',
+                        'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
+                        'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
+                    }
+                    key = s.lower()
+                    if key in mapping:
+                        return mapping[key]
+                    # If looks like full name, capitalize first letter only (Tavily accepts English names)
+                    return s
+
+                norm_ai_country = _normalizeCountry(ai_country)
+                norm_req_country = _normalizeCountry(request.options.country)
+                if norm_ai_country:
+                    search_kwargs["country"] = norm_ai_country
+                elif norm_req_country:
+                    search_kwargs["country"] = norm_req_country
+                    
+                if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
+                    search_kwargs["language"] = ai_language
+                elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']:
+                    search_kwargs["language"] = request.options.language
+                    
+                if ai_topic and ai_topic in ['general', 'news', 'academic']:
+                    search_kwargs["topic"] = ai_topic
+                elif request.options.topic and request.options.topic in ['general', 'news', 'academic']:
+                    search_kwargs["topic"] = request.options.topic
+                    
+                if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']:
+                    search_kwargs["time_range"] = ai_time_range
+                elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
+                    search_kwargs["time_range"] = request.options.time_range
+
+                # Constrain by expected domains if provided by AI
+                try:
+                    include_domains = []
+                    for p in expected_patterns or []:
+                        if not isinstance(p, str):
+                            continue
+                        # Extract bare domain from pattern or URL
+                        import re
+                        m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
+                        if m:
+                            domain = m.group(1).lower()
+                            # strip leading www.
+                            if domain.startswith('www.'):
+                                domain = domain[4:]
+                            include_domains.append(domain)
+                    # Deduplicate
+                    if include_domains:
+                        seen = set()
+                        uniq = []
+                        for d in include_domains:
+                            if d not in seen:
+                                seen.add(d)
+                                uniq.append(d)
+                        search_kwargs["include_domains"] = uniq
+                except Exception:
+                    pass
+                
+                # Log the parameters being used
+                logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
+                
+                search_results = await self.aiObjects.search_websites(**search_kwargs)
+                
+                logger.debug(f"Web search returned {len(search_results)} results:")
+                for i, result in enumerate(search_results, 1):
+                    logger.debug(f"   {i}. {result.url} - {result.title}")
+                
+                # Deduplicate while preserving order
+                seen = set()
+                search_urls = []
+                for r in search_results:
+                    u = str(r.url)
+                    if u not in seen:
+                        seen.add(u)
+                        search_urls.append(u)
+                
+                logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
+                
+                if not search_urls:
+                    logger.error("No relevant websites found")
+                    return WebResearchActionResult(success=False, error="No relevant websites found")
+                
+                # Now use AI to determine the main URLs based on user's intention
+                logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
+                
+                # Create a prompt for AI to identify main URLs based on user's intention
+                ai_prompt = f"""
+        Select the most relevant URLs from these search results:
+
+        {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])}
+
+        Return only the URLs that are most relevant for the user's query.
+        One URL per line.
+        """                
+                # Create AI call request
+                ai_request = AiCallRequest(
+                    prompt=ai_prompt,
+                    options=AiCallOptions()
+                )
+                ai_response_obj = await self.aiObjects.call(ai_request)
+                ai_response = ai_response_obj.content
+                logger.debug(f"AI response for main URL selection: {ai_response}")
+                
+                # Parse AI response to extract URLs
+                websites = []
+                for line in ai_response.strip().split('\n'):
+                    line = line.strip()
+                    if line and ('http://' in line or 'https://' in line):
+                        # Extract URL from the line
+                        for word in line.split():
+                            if word.startswith('http://') or word.startswith('https://'):
+                                websites.append(word.rstrip('.,;'))
+                                break
+                
+                if not websites:
+                    logger.warning("AI did not identify any main URLs, using first few search results")
+                    websites = search_urls[:3]  # Fallback to first 3 search results
+                
+                # Deduplicate while preserving order
+                seen = set()
+                unique_websites = []
+                for url in websites:
+                    if url not in seen:
+                        seen.add(url)
+                        unique_websites.append(url)
+                
+                websites = unique_websites
+                logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
+                
+                logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
+                for i, url in enumerate(websites, 1):
+                    logger.info(f"   {i}. {url}")
+            
+            # Step 2: Smart website selection using AI interface
+            logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
+            logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
+            
+            selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt)
+            
+            logger.debug(f"AI Response: {aiResponse}")
+            logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:")
+            for i, url in enumerate(selectedWebsites, 1):
+                logger.debug(f"   {i}. {url}")
+            
+            # Show which were filtered out
+            filtered_out = [url for url in websites if url not in selectedWebsites]
+            if filtered_out:
+                logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:")
+                for i, url in enumerate(filtered_out, 1):
+                    logger.debug(f"   {i}. {url}")
+            
+            # Step 3+4+5: Recursive crawling with configurable depth
+            # Get configuration parameters
+            max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
+            max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
+            crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
+            crawl_timeout_seconds = crawl_timeout_minutes * 60
+            
+            # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
+            effective_depth = min(max_depth, request.options.pages_search_depth)
+            
+            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
+            logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
+            logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
+            logger.info(f"Max links per domain: {max_links_per_domain}")
+            logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
+            
+            # Use recursive crawling with URL index to avoid duplicates
+            import asyncio
+            try:
+                allContent = await asyncio.wait_for(
+                    self.aiObjects.crawlRecursively(
+                        urls=selectedWebsites,
+                        max_depth=effective_depth,
+                        extract_depth=request.options.extract_depth,
+                        max_per_domain=max_links_per_domain,
+                        global_processed_urls=global_processed_urls
+                    ),
+                    timeout=crawl_timeout_seconds
+                )
+                logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
+            except asyncio.TimeoutError:
+                logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
+                # crawlRecursively now handles timeouts gracefully and returns partial results
+                # Try to get the partial results that were collected
+                allContent = {}
+
+            if not allContent:
+                logger.error("Could not extract content from any websites")
+                return WebResearchActionResult(success=False, error="Could not extract content from any websites")
+            
+            logger.info(f"=== WEB RESEARCH COMPLETED ===")
+            logger.info(f"Successfully crawled {len(allContent)} URLs total")
+            logger.info(f"Crawl depth: {effective_depth} levels")
+            
+            # Create simple result with raw content
+            sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
+            
+            # Get all additional links (all URLs except main ones)
+            additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
+            
+            # Combine all content into a single result
+            combinedContent = ""
+            for url, content in allContent.items():
+                combinedContent += f"\n\n=== {url} ===\n{content}\n"
+            
+            documentData = WebResearchDocumentData(
+                user_prompt=request.user_prompt,
+                websites_analyzed=len(allContent),
+                additional_links_found=len(additional_links),
+                analysis_result=combinedContent,  # Raw content, no analysis
+                sources=sources,
+                additional_links=additional_links,
+                individual_content=allContent,  # Individual URL -> content mapping
+                debug_info={
+                    "crawl_depth": effective_depth,
+                    "max_configured_depth": max_depth,
+                    "max_links_per_domain": max_links_per_domain,
+                    "crawl_timeout_minutes": crawl_timeout_minutes,
+                    "total_urls_crawled": len(allContent),
+                    "main_urls": len(selectedWebsites),
+                    "additional_urls": len(additional_links)
+                }
+            )
+            
+            document = WebResearchActionDocument(
+                documentName=f"web_research_{request.user_prompt[:50]}.json",
+                documentData=documentData,
+                mimeType="application/json"
+            )
+            
+            return WebResearchActionResult(
+                success=True,
+                documents=[document],
+                resultLabel="web_research_results"
+            )
+            
+        except Exception as e:
+            logger.error(f"Error in web research: {str(e)}")
+            return WebResearchActionResult(success=False, error=str(e))
--- a/modules/services/serviceExtraction/chunking/image_chunker.py
+++ b/modules/services/serviceExtraction/chunking/image_chunker.py
--- a/modules/services/serviceExtraction/chunking/structure_chunker.py
+++ b/modules/services/serviceExtraction/chunking/structure_chunker.py
--- a/modules/services/serviceExtraction/chunking/table_chunker.py
+++ b/modules/services/serviceExtraction/chunking/table_chunker.py
--- a/modules/services/serviceExtraction/chunking/text_chunker.py
+++ b/modules/services/serviceExtraction/chunking/text_chunker.py
--- a/modules/services/serviceExtraction/extractors/init.py
+++ b/modules/services/serviceExtraction/extractors/init.py
--- a/modules/services/serviceExtraction/extractors/extractorBinary.py
+++ b/modules/services/serviceExtraction/extractors/extractorBinary.py
@ -7,9 +7,29 @@ from ..subRegistry import Extractor


 class BinaryExtractor(Extractor):
+    """
+    Fallback extractor for unsupported file types.
+    
+    This extractor handles any file type that doesn't match other extractors.
+    It encodes the file as base64 and marks it as binary data.
+    
+    Supported formats:
+    - All file types (fallback)
+    - MIME types: application/octet-stream (default)
+    - File extensions: All (fallback)
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return True
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions (all)."""
+        return []  # Accepts all extensions as fallback
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types (all)."""
+        return []  # Accepts all MIME types as fallback
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/octet-stream"
        return [ContentPart(
--- a/modules/services/serviceExtraction/extractors/extractorCsv.py
+++ b/modules/services/serviceExtraction/extractors/extractorCsv.py
@ -6,9 +6,26 @@ from ..subRegistry import Extractor


 class CsvExtractor(Extractor):
+    """
+    Extractor for CSV files.
+    
+    Supported formats:
+    - MIME types: text/csv
+    - File extensions: .csv
+    - Special handling: Treats as table data
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "text/csv" or (fileName or "").lower().endswith(".csv")
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".csv"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["text/csv"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
        mimeType = context.get("mimeType") or "text/csv"
--- a/modules/services/serviceExtraction/extractors/extractorDocx.py
+++ b/modules/services/serviceExtraction/extractors/extractorDocx.py
@ -7,6 +7,16 @@ from ..subRegistry import Extractor


 class DocxExtractor(Extractor):
+    """
+    Extractor for Microsoft Word documents.
+    
+    Supported formats:
+    - MIME types: application/vnd.openxmlformats-officedocument.wordprocessingml.document
+    - File extensions: .docx
+    - Special handling: Extracts paragraphs and tables (converts tables to CSV)
+    - Dependencies: python-docx
+    """
+    
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -25,6 +35,14 @@ class DocxExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or (fileName or "").lower().endswith(".docx")
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".docx"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
        parts: List[ContentPart] = []
--- a/modules/services/serviceExtraction/extractors/extractorHtml.py
+++ b/modules/services/serviceExtraction/extractors/extractorHtml.py
@ -7,9 +7,27 @@ from ..subRegistry import Extractor


 class HtmlExtractor(Extractor):
+    """
+    Extractor for HTML files.
+    
+    Supported formats:
+    - MIME types: text/html
+    - File extensions: .html, .htm
+    - Special handling: Uses BeautifulSoup for parsing
+    - Dependencies: beautifulsoup4
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "text/html" or (fileName or "").lower().endswith((".html", ".htm"))
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".html", ".htm"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["text/html"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "text/html"
        text = fileBytes.decode("utf-8", errors="replace")
--- a/modules/services/serviceExtraction/extractors/extractorImage.py
+++ b/modules/services/serviceExtraction/extractors/extractorImage.py
@ -0,0 +1,75 @@
+from typing import Any, Dict, List
+import base64
+import logging
+
+from ..subUtils import makeId
+from modules.datamodels.datamodelExtraction import ContentPart
+from ..subRegistry import Extractor
+
+logger = logging.getLogger(__name__)
+
+
+class ImageExtractor(Extractor):
+    """
+    Extractor for image files.
+    
+    Supported formats:
+    - MIME types: image/jpeg, image/png, image/gif, image/webp, image/bmp, image/tiff
+    - File extensions: .jpg, .jpeg, .png, .gif, .webp, .bmp, .tiff
+    - Special handling: GIF files are converted to PNG during extraction
+    """
+    
+    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
+        return ((mimeType or "").startswith("image/") or 
+                (fileName or "").lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff")))
+    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["image/jpeg", "image/png", "image/gif", "image/webp", "image/bmp", "image/tiff"]
+
+    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
+        mimeType = context.get("mimeType") or "image/unknown"
+        fileName = context.get("fileName", "")
+        
+        # Convert GIF to PNG during extraction
+        if mimeType.lower() == "image/gif":
+            try:
+                from PIL import Image
+                import io
+                
+                # Open GIF and convert to PNG
+                with Image.open(io.BytesIO(fileBytes)) as img:
+                    # Convert to RGB (removes animation)
+                    if img.mode in ('RGBA', 'LA', 'P'):
+                        img = img.convert('RGB')
+                    
+                    # Save as PNG in memory
+                    png_buffer = io.BytesIO()
+                    img.save(png_buffer, format='PNG')
+                    png_data = png_buffer.getvalue()
+                    
+                    # Update mimeType and fileBytes
+                    mimeType = "image/png"
+                    fileBytes = png_data
+                    
+                    logger.info(f"GIF converted to PNG during extraction: {fileName}, original={len(fileBytes)} bytes, converted={len(png_data)} bytes")
+                    
+            except Exception as e:
+                logger.warning(f"GIF conversion failed during extraction for {fileName}: {str(e)}, using original")
+                # Keep original GIF data if conversion fails
+        
+        return [ContentPart(
+            id=makeId(),
+            parentId=None,
+            label="image",
+            typeGroup="image",
+            mimeType=mimeType,
+            data=base64.b64encode(fileBytes).decode("utf-8"),
+            metadata={"size": len(fileBytes)}
+        )]
+
+
--- a/modules/services/serviceExtraction/extractors/extractorJson.py
+++ b/modules/services/serviceExtraction/extractors/extractorJson.py
@ -7,9 +7,26 @@ from ..subRegistry import Extractor


 class JsonExtractor(Extractor):
+    """
+    Extractor for JSON files.
+    
+    Supported formats:
+    - MIME types: application/json
+    - File extensions: .json
+    - Special handling: Validates JSON format, falls back to text if invalid
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/json" or (fileName or "").lower().endswith(".json")
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".json"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["application/json"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/json"
        text = fileBytes.decode("utf-8", errors="replace")
--- a/modules/services/serviceExtraction/extractors/extractorPdf.py
+++ b/modules/services/serviceExtraction/extractors/extractorPdf.py
@ -8,6 +8,16 @@ from ..subRegistry import Extractor


 class PdfExtractor(Extractor):
+    """
+    Extractor for PDF files.
+    
+    Supported formats:
+    - MIME types: application/pdf
+    - File extensions: .pdf
+    - Special handling: Extracts text per page and embedded images
+    - Dependencies: PyPDF2, PyMuPDF (fitz)
+    """
+    
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -27,6 +37,14 @@ class PdfExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/pdf" or (fileName or "").lower().endswith(".pdf")
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".pdf"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["application/pdf"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
        parts: List[ContentPart] = []
--- a/modules/services/serviceExtraction/extractors/extractorPptx.py
+++ b/modules/services/serviceExtraction/extractors/extractorPptx.py
@ -0,0 +1,225 @@
+import logging
+import base64
+from typing import List, Dict, Any, Optional
+from modules.datamodels.datamodelExtraction import ContentPart, ContentExtracted
+from ..subRegistry import Extractor
+
+logger = logging.getLogger(__name__)
+
+
+class PptxExtractor(Extractor):
+    """
+    Extractor for PowerPoint files.
+    
+    Supported formats:
+    - MIME types: application/vnd.openxmlformats-officedocument.presentationml.presentation, application/vnd.ms-powerpoint
+    - File extensions: .pptx, .ppt
+    - Special handling: Extracts slide content, tables, and images
+    - Dependencies: python-pptx
+    """
+    
+    def __init__(self):
+        self._loaded = False
+        self._haveLibs = False
+    
+    def _load(self):
+        if self._loaded:
+            return
+        self._loaded = True
+        try:
+            global Presentation
+            from pptx import Presentation
+            self._haveLibs = True
+        except Exception:
+            self._haveLibs = False
+    
+    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
+        return (mimeType in [
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            "application/vnd.ms-powerpoint"
+        ]) or (fileName or "").lower().endswith((".pptx", ".ppt"))
+    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".pptx", ".ppt"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return [
+            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
+            "application/vnd.ms-powerpoint"
+        ]
+    
+    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
+        """
+        Extract content from PowerPoint files.
+        
+        Args:
+            fileBytes: Raw file data as bytes
+            context: Context dictionary with file information
+        
+        Returns:
+            List of ContentPart objects with extracted content
+        """
+        self._load()
+        
+        if not self._haveLibs:
+            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
+            return [ContentPart(
+                id="error",
+                label="PowerPoint Extraction Error",
+                typeGroup="text",
+                mimeType="text/plain",
+                data="Error: python-pptx library not installed",
+                metadata={"error": True, "error_message": "python-pptx library not installed"}
+            )]
+        
+        try:
+            import io
+            
+            # Load presentation from bytes
+            presentation = Presentation(io.BytesIO(fileBytes))
+            
+            parts = []
+            slide_index = 0
+            
+            # Extract content from each slide
+            for slide in presentation.slides:
+                slide_index += 1
+                slide_content = []
+                
+                # Extract text from slide
+                for shape in slide.shapes:
+                    if hasattr(shape, "text") and shape.text.strip():
+                        slide_content.append(shape.text.strip())
+                
+                # Extract table data
+                for shape in slide.shapes:
+                    if shape.has_table:
+                        table = shape.table
+                        table_data = []
+                        for row in table.rows:
+                            row_data = []
+                            for cell in row.cells:
+                                row_data.append(cell.text.strip())
+                            table_data.append(row_data)
+                        
+                        if table_data:
+                            # Convert table to markdown format
+                            table_md = self._table_to_markdown(table_data)
+                            slide_content.append(table_md)
+                
+                # Extract images
+                for shape in slide.shapes:
+                    if shape.shape_type == 13:  # MSO_SHAPE_TYPE.PICTURE
+                        try:
+                            image = shape.image
+                            image_bytes = image.blob
+                            image_b64 = base64.b64encode(image_bytes).decode('utf-8')
+                            
+                            # Create image part
+                            image_part = ContentPart(
+                                id=f"slide_{slide_index}_image_{len(parts)}",
+                                label=f"Slide {slide_index} Image",
+                                typeGroup="image",
+                                mimeType="image/png",  # Default to PNG
+                                data=image_b64,
+                                metadata={
+                                    "slide_number": slide_index,
+                                    "shape_type": "image",
+                                    "extracted_from": "powerpoint"
+                                }
+                            )
+                            parts.append(image_part)
+                        except Exception as e:
+                            logger.warning(f"Failed to extract image from slide {slide_index}: {str(e)}")
+                
+                # Create slide content part
+                if slide_content:
+                    slide_text = f"# Slide {slide_index}\n\n" + "\n\n".join(slide_content)
+                    
+                    slide_part = ContentPart(
+                        id=f"slide_{slide_index}",
+                        label=f"Slide {slide_index} Content",
+                        typeGroup="structure",
+                        mimeType="text/plain",
+                        data=slide_text,
+                        metadata={
+                            "slide_number": slide_index,
+                            "content_type": "slide",
+                            "extracted_from": "powerpoint",
+                            "text_length": len(slide_text)
+                        }
+                    )
+                    parts.append(slide_part)
+            
+            # Create presentation overview
+            file_name = context.get("fileName", "presentation.pptx")
+            overview_text = f"# PowerPoint Presentation: {file_name}\n\n"
+            overview_text += f"**Total Slides:** {len(presentation.slides)}\n\n"
+            overview_text += f"**Content Parts:** {len(parts)}\n\n"
+            
+            # Add slide summaries
+            for i, slide in enumerate(presentation.slides, 1):
+                slide_text_parts = []
+                for shape in slide.shapes:
+                    if hasattr(shape, "text") and shape.text.strip():
+                        slide_text_parts.append(shape.text.strip())
+                
+                if slide_text_parts:
+                    overview_text += f"## Slide {i}\n"
+                    overview_text += "\n".join(slide_text_parts[:3])  # First 3 text elements
+                    overview_text += "\n\n"
+            
+            # Create overview part
+            overview_part = ContentPart(
+                id="presentation_overview",
+                label="Presentation Overview",
+                typeGroup="text",
+                mimeType="text/plain",
+                data=overview_text,
+                metadata={
+                    "content_type": "overview",
+                    "extracted_from": "powerpoint",
+                    "total_slides": len(presentation.slides),
+                    "text_length": len(overview_text)
+                }
+            )
+            parts.insert(0, overview_part)  # Insert at beginning
+            
+            return parts
+            
+        except Exception as e:
+            logger.error(f"Error extracting PowerPoint content: {str(e)}")
+            return [ContentPart(
+                id="error",
+                label="PowerPoint Extraction Error",
+                typeGroup="text",
+                mimeType="text/plain",
+                data=f"Error extracting PowerPoint content: {str(e)}",
+                metadata={"error": True, "error_message": str(e)}
+            )]
+    
+    def _table_to_markdown(self, table_data: List[List[str]]) -> str:
+        """Convert table data to markdown format."""
+        if not table_data:
+            return ""
+        
+        markdown_lines = []
+        
+        # Header row
+        if table_data:
+            header = "| " + " | ".join(table_data[0]) + " |"
+            markdown_lines.append(header)
+            
+            # Separator row
+            separator = "| " + " | ".join(["---"] * len(table_data[0])) + " |"
+            markdown_lines.append(separator)
+            
+            # Data rows
+            for row in table_data[1:]:
+                data_row = "| " + " | ".join(row) + " |"
+                markdown_lines.append(data_row)
+        
+        return "\n".join(markdown_lines)
+    
--- a/modules/services/serviceExtraction/extractors/extractorSql.py
+++ b/modules/services/serviceExtraction/extractors/extractorSql.py
@ -0,0 +1,56 @@
+from typing import Any, Dict, List
+
+from modules.datamodels.datamodelExtraction import ContentPart
+from ..subUtils import makeId
+from ..subRegistry import Extractor
+
+
+class SqlExtractor(Extractor):
+    """
+    Extractor for SQL files.
+    
+    Supported formats:
+    - MIME types: text/x-sql, application/sql
+    - File extensions: .sql, .ddl, .dml, .dcl, .tcl
+    - Special handling: Treats as structured text with SQL syntax
+    """
+    
+    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
+        return (mimeType in ("text/x-sql", "application/sql") or 
+                (fileName or "").lower().endswith((".sql", ".ddl", ".dml", ".dcl", ".tcl")))
+    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".sql", ".ddl", ".dml", ".dcl", ".tcl"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["text/x-sql", "application/sql"]
+
+    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
+        fileName = context.get("fileName")
+        mimeType = context.get("mimeType") or "text/x-sql"
+        data = fileBytes.decode("utf-8", errors="replace")
+        
+        # Add SQL-specific metadata
+        metadata = {
+            "size": len(fileBytes),
+            "file_type": "sql",
+            "line_count": len(data.splitlines()),
+            "has_select": "SELECT" in data.upper(),
+            "has_insert": "INSERT" in data.upper(),
+            "has_update": "UPDATE" in data.upper(),
+            "has_delete": "DELETE" in data.upper(),
+            "has_create": "CREATE" in data.upper(),
+            "has_drop": "DROP" in data.upper()
+        }
+        
+        return [ContentPart(
+            id=makeId(),
+            parentId=None,
+            label="main",
+            typeGroup="structure",
+            mimeType=mimeType,
+            data=data,
+            metadata=metadata
+        )]
--- a/modules/services/serviceExtraction/extractors/extractorText.py
+++ b/modules/services/serviceExtraction/extractors/extractorText.py
@ -0,0 +1,103 @@
+from typing import Any, Dict, List
+
+from modules.datamodels.datamodelExtraction import ContentPart
+from ..subUtils import makeId
+from ..subRegistry import Extractor
+
+
+class TextExtractor(Extractor):
+    """
+    Extractor for plain text files and code files.
+    
+    Supported formats:
+    - MIME types: text/plain, text/markdown, text/x-python, text/x-java-source, text/javascript, etc.
+    - File extensions: .txt, .md, .log, .java, .js, .jsx, .ts, .tsx, .py, .config, .ini, .cfg, .conf, .properties, .yaml, .yml, .toml, .sh, .bat, .ps1, .sql, .css, .scss, .sass, .less, .xml, .json, .csv, .tsv, .rtf, .tex, .rst, .adoc, .org, .pod, .man, .1, .2, .3, .4, .5, .6, .7, .8, .9, .n, .l, .m, .r, .t, .x, .y, .z
+    """
+    
+    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
+        # Check MIME types
+        if mimeType and mimeType.startswith("text/"):
+            return True
+        
+        # Check file extensions
+        if fileName:
+            ext = fileName.lower()
+            return ext.endswith((
+                # Basic text files
+                ".txt", ".md", ".log", ".rtf", ".tex", ".rst", ".adoc", ".org", ".pod",
+                # Programming languages
+                ".java", ".js", ".jsx", ".ts", ".tsx", ".py", ".rb", ".go", ".rs", ".cpp", ".c", ".h", ".hpp", ".cc", ".cxx",
+                ".cs", ".php", ".swift", ".kt", ".scala", ".clj", ".hs", ".ml", ".fs", ".vb", ".dart", ".r", ".m", ".pl", ".sh",
+                # Web technologies
+                ".html", ".htm", ".css", ".scss", ".sass", ".less", ".vue", ".svelte",
+                # Configuration files
+                ".config", ".ini", ".cfg", ".conf", ".properties", ".yaml", ".yml", ".toml", ".json", ".xml",
+                # Scripts and automation
+                ".bat", ".ps1", ".psm1", ".psd1", ".vbs", ".wsf", ".cmd", ".com",
+                # Data files
+                ".csv", ".tsv", ".tab", ".dat", ".data",
+                # Documentation
+                ".man", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".n", ".l", ".m", ".r", ".t", ".x", ".y", ".z",
+                # Other text formats
+                ".diff", ".patch", ".gitignore", ".dockerignore", ".editorconfig", ".gitattributes",
+                ".env", ".env.local", ".env.development", ".env.production", ".env.test",
+                ".lock", ".lockb", ".lockfile", ".pkg-lock", ".yarn-lock"
+            ))
+        
+        return False
+    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [
+            # Basic text files
+            ".txt", ".md", ".log", ".rtf", ".tex", ".rst", ".adoc", ".org", ".pod",
+            # Programming languages
+            ".java", ".js", ".jsx", ".ts", ".tsx", ".py", ".rb", ".go", ".rs", ".cpp", ".c", ".h", ".hpp", ".cc", ".cxx",
+            ".cs", ".php", ".swift", ".kt", ".scala", ".clj", ".hs", ".ml", ".fs", ".vb", ".dart", ".r", ".m", ".pl", ".sh",
+            # Web technologies
+            ".html", ".htm", ".css", ".scss", ".sass", ".less", ".vue", ".svelte",
+            # Configuration files
+            ".config", ".ini", ".cfg", ".conf", ".properties", ".yaml", ".yml", ".toml", ".json", ".xml",
+            # Scripts and automation
+            ".bat", ".ps1", ".psm1", ".psd1", ".vbs", ".wsf", ".cmd", ".com",
+            # Data files
+            ".csv", ".tsv", ".tab", ".dat", ".data",
+            # Documentation
+            ".man", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".n", ".l", ".m", ".r", ".t", ".x", ".y", ".z",
+            # Other text formats
+            ".diff", ".patch", ".gitignore", ".dockerignore", ".editorconfig", ".gitattributes",
+            ".env", ".env.local", ".env.development", ".env.production", ".env.test",
+            ".lock", ".lockb", ".lockfile", ".pkg-lock", ".yarn-lock"
+        ]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return [
+            "text/plain", "text/markdown", "text/x-python", "text/x-java-source", 
+            "text/javascript", "text/x-javascript", "text/typescript", "text/x-typescript",
+            "text/x-c", "text/x-c++", "text/x-csharp", "text/x-php", "text/x-ruby",
+            "text/x-go", "text/x-rust", "text/x-scala", "text/x-swift", "text/x-kotlin",
+            "text/x-sql", "text/x-sh", "text/x-shellscript", "text/x-yaml", "text/x-toml",
+            "text/x-ini", "text/x-config", "text/x-properties", "text/x-log",
+            "text/html", "text/css", "text/x-scss", "text/x-sass", "text/x-less",
+            "text/xml", "text/csv", "text/tab-separated-values", "text/rtf",
+            "text/x-tex", "text/x-rst", "text/x-asciidoc", "text/x-org",
+            "application/x-yaml", "application/x-toml", "application/x-ini",
+            "application/x-config", "application/x-properties", "application/x-log"
+        ]
+
+    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
+        fileName = context.get("fileName")
+        mimeType = context.get("mimeType") or "text/plain"
+        data = fileBytes.decode("utf-8", errors="replace")
+        return [ContentPart(
+            id=makeId(),
+            parentId=None,
+            label="main",
+            typeGroup="text",
+            mimeType=mimeType,
+            data=data,
+            metadata={"size": len(fileBytes)}
+        )]
+
+
--- a/modules/services/serviceExtraction/extractors/extractorXlsx.py
+++ b/modules/services/serviceExtraction/extractors/extractorXlsx.py
@ -8,6 +8,16 @@ from ..subRegistry import Extractor


 class XlsxExtractor(Extractor):
+    """
+    Extractor for Microsoft Excel spreadsheets.
+    
+    Supported formats:
+    - MIME types: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+    - File extensions: .xlsx, .xlsm
+    - Special handling: Extracts all sheets as CSV data
+    - Dependencies: openpyxl
+    """
+    
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -27,6 +37,14 @@ class XlsxExtractor(Extractor):
        mt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        return mimeType == mt or (fileName or "").lower().endswith((".xlsx", ".xlsm"))
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".xlsx", ".xlsm"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
        parts: List[ContentPart] = []
--- a/modules/services/serviceExtraction/extractors/extractorXml.py
+++ b/modules/services/serviceExtraction/extractors/extractorXml.py
@ -7,9 +7,26 @@ from ..subRegistry import Extractor


 class XmlExtractor(Extractor):
+    """
+    Extractor for XML files.
+    
+    Supported formats:
+    - MIME types: application/xml
+    - File extensions: .xml, .rss, .atom
+    - Special handling: Uses ElementTree for parsing
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/xml" or (fileName or "").lower().endswith((".xml", ".rss", ".atom"))
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions."""
+        return [".xml", ".rss", ".atom"]
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return ["application/xml"]
+
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/xml"
        text = fileBytes.decode("utf-8", errors="replace")
--- a/modules/services/serviceExtraction/formats/image_extractor.py
+++ b/modules/services/serviceExtraction/formats/image_extractor.py
@ -1,25 +0,0 @@
-from typing import Any, Dict, List
-import base64
-
-from ..subUtils import makeId
-from modules.datamodels.datamodelExtraction import ContentPart
-from ..subRegistry import Extractor
-
-
-class ImageExtractor(Extractor):
-    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
-        return (mimeType or "").startswith("image/")
-
-    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
-        mimeType = context.get("mimeType") or "image/unknown"
-        return [ContentPart(
-            id=makeId(),
-            parentId=None,
-            label="image",
-            typeGroup="image",
-            mimeType=mimeType,
-            data=base64.b64encode(fileBytes).decode("utf-8"),
-            metadata={"size": len(fileBytes)}
-        )]
-
-
--- a/modules/services/serviceExtraction/formats/text_extractor.py
+++ b/modules/services/serviceExtraction/formats/text_extractor.py
@ -1,26 +0,0 @@
-from typing import Any, Dict, List
-
-from modules.datamodels.datamodelExtraction import ContentPart
-from ..subUtils import makeId
-from ..subRegistry import Extractor
-
-
-class TextExtractor(Extractor):
-    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
-        return mimeType in ("text/plain", "text/markdown")
-
-    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
-        fileName = context.get("fileName")
-        mimeType = context.get("mimeType") or "text/plain"
-        data = fileBytes.decode("utf-8", errors="replace")
-        return [ContentPart(
-            id=makeId(),
-            parentId=None,
-            label="main",
-            typeGroup="text",
-            mimeType=mimeType,
-            data=data,
-            metadata={"size": len(fileBytes)}
-        )]
-
-
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@ -67,10 +67,12 @@ class ExtractionService:
                if part.metadata:
                    logger.debug(f"    Metadata: {part.metadata}")
            
-            # Attach document id to parts if missing
+            # Attach document id and MIME type to parts if missing
            for p in ec.parts:
                if "documentId" not in p.metadata:
                    p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
+                if "documentMimeType" not in p.metadata:
+                    p.metadata["documentMimeType"] = documentData["mimeType"]
            
            # Log chunking information
            chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
--- a/modules/services/serviceExtraction/merging/default_merger.py
+++ b/modules/services/serviceExtraction/merging/default_merger.py
--- a/modules/services/serviceExtraction/merging/table_merger.py
+++ b/modules/services/serviceExtraction/merging/table_merger.py
--- a/modules/services/serviceExtraction/merging/text_merger.py
+++ b/modules/services/serviceExtraction/merging/text_merger.py
--- a/modules/services/serviceExtraction/subMerger.py
+++ b/modules/services/serviceExtraction/subMerger.py
@ -0,0 +1,209 @@
+"""
+Intelligent Token-Aware Merger for optimizing AI calls based on LLM token limits.
+"""
+from typing import List, Dict, Any, Tuple
+import logging
+from modules.datamodels.datamodelExtraction import ContentPart
+from .subUtils import makeId
+
+logger = logging.getLogger(__name__)
+
+
+class IntelligentTokenAwareMerger:
+    """
+    Intelligent merger that groups chunks based on LLM token limits to minimize AI calls.
+    
+    Strategy:
+    1. Calculate token count for each chunk
+    2. Group chunks to maximize token usage without exceeding limits
+    3. Preserve document structure and semantic boundaries
+    4. Minimize total number of AI calls
+    """
+    
+    def __init__(self, model_capabilities: Dict[str, Any]):
+        self.max_tokens = model_capabilities.get("maxTokens", 4000)
+        self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
+        self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
+        self.chars_per_token = model_capabilities.get("charsPerToken", 4)  # Rough estimation
+        
+    def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
+        """
+        Merge chunks intelligently based on token limits.
+        
+        Args:
+            chunks: List of ContentPart chunks to merge
+            prompt: AI prompt to account for in token calculation
+            
+        Returns:
+            List of optimally merged ContentPart objects
+        """
+        if not chunks:
+            return chunks
+            
+        logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
+        
+        # Calculate tokens for prompt
+        prompt_tokens = self._estimate_tokens(prompt)
+        available_tokens = self.effective_max_tokens - prompt_tokens
+        
+        logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
+        
+        # Group chunks by document and type for semantic coherence
+        grouped_chunks = self._group_chunks_by_document_and_type(chunks)
+        
+        merged_parts = []
+        
+        for group_key, group_chunks in grouped_chunks.items():
+            logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
+            
+            # Merge chunks within this group optimally
+            group_merged = self._merge_group_optimally(group_chunks, available_tokens)
+            merged_parts.extend(group_merged)
+            
+        logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts")
+        return merged_parts
+    
+    def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
+        """Group chunks by document and type for semantic coherence."""
+        groups = {}
+        
+        for chunk in chunks:
+            # Create group key: document_id + type_group
+            doc_id = chunk.metadata.get("documentId", "unknown")
+            type_group = chunk.typeGroup
+            group_key = f"{doc_id}_{type_group}"
+            
+            if group_key not in groups:
+                groups[group_key] = []
+            groups[group_key].append(chunk)
+            
+        return groups
+    
+    def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
+        """Merge chunks within a group optimally to minimize AI calls."""
+        if not chunks:
+            return []
+            
+        # Sort chunks by size (smallest first for better packing)
+        sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
+        
+        merged_parts = []
+        current_group = []
+        current_tokens = 0
+        
+        for chunk in sorted_chunks:
+            chunk_tokens = self._estimate_tokens(chunk.data)
+            
+            # Special case: If single chunk is already at max size, process it alone
+            if chunk_tokens >= available_tokens * 0.9:  # 90% of available tokens
+                # Finalize current group if it exists
+                if current_group:
+                    merged_part = self._create_merged_part(current_group, current_tokens)
+                    merged_parts.append(merged_part)
+                    current_group = []
+                    current_tokens = 0
+                
+                # Process large chunk individually
+                merged_parts.append(chunk)
+                logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
+                continue
+            
+            # If adding this chunk would exceed limit, finalize current group
+            if current_tokens + chunk_tokens > available_tokens and current_group:
+                merged_part = self._create_merged_part(current_group, current_tokens)
+                merged_parts.append(merged_part)
+                current_group = [chunk]
+                current_tokens = chunk_tokens
+            else:
+                current_group.append(chunk)
+                current_tokens += chunk_tokens
+                
+        # Finalize remaining group
+        if current_group:
+            merged_part = self._create_merged_part(current_group, current_tokens)
+            merged_parts.append(merged_part)
+            
+        logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts")
+        return merged_parts
+    
+    def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
+        """Create a merged ContentPart from multiple chunks."""
+        if len(chunks) == 1:
+            return chunks[0]  # No need to merge single chunk
+            
+        # Combine data with semantic separators
+        combined_data = self._combine_chunk_data(chunks)
+        
+        # Use metadata from first chunk as base
+        base_chunk = chunks[0]
+        merged_metadata = base_chunk.metadata.copy()
+        merged_metadata.update({
+            "merged": True,
+            "originalChunkCount": len(chunks),
+            "totalTokens": total_tokens,
+            "originalChunkIds": [c.id for c in chunks],
+            "size": len(combined_data.encode('utf-8'))
+        })
+        
+        merged_part = ContentPart(
+            id=makeId(),
+            parentId=base_chunk.parentId,
+            label=f"merged_{len(chunks)}_chunks",
+            typeGroup=base_chunk.typeGroup,
+            mimeType=base_chunk.mimeType,
+            data=combined_data,
+            metadata=merged_metadata
+        )
+        
+        logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
+        return merged_part
+    
+    def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
+        """Combine chunk data with appropriate separators."""
+        if not chunks:
+            return ""
+            
+        # Use different separators based on content type
+        if chunks[0].typeGroup == "text":
+            separator = "\n\n---\n\n"  # Clear text separation
+        elif chunks[0].typeGroup == "table":
+            separator = "\n\n[TABLE BREAK]\n\n"  # Table separation
+        else:
+            separator = "\n\n---\n\n"  # Default separation
+            
+        return separator.join([chunk.data for chunk in chunks])
+    
+    def _estimate_tokens(self, text: str) -> int:
+        """Estimate token count for text."""
+        if not text:
+            return 0
+        return len(text) // self.chars_per_token
+    
+    def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
+        """Calculate optimization statistics with detailed analysis."""
+        original_calls = len(original_chunks)
+        optimized_calls = len(merged_parts)
+        reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
+        
+        # Analyze chunk sizes
+        large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
+        small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
+        
+        # Calculate theoretical maximum optimization (if all small chunks could be merged)
+        theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3)  # Assume 3 small chunks per call
+        theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
+        
+        return {
+            "original_ai_calls": original_calls,
+            "optimized_ai_calls": optimized_calls,
+            "reduction_percent": round(reduction_percent, 1),
+            "cost_savings": f"{reduction_percent:.1f}%",
+            "efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞",
+            "analysis": {
+                "large_chunks": len(large_chunks),
+                "small_chunks": len(small_chunks),
+                "theoretical_min_calls": theoretical_min_calls,
+                "theoretical_reduction": round(theoretical_reduction, 1),
+                "optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
+            }
+        }
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@ -3,11 +3,13 @@ import logging
 import os

 from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
+from modules.shared.configuration import APP_CONFIG
 from .subUtils import makeId
 from .subRegistry import ExtractorRegistry, ChunkerRegistry
-from .merging.text_merger import TextMerger
-from .merging.table_merger import TableMerger
-from .merging.default_merger import DefaultMerger
+from .merging.mergerText import TextMerger
+from .merging.mergerTable import TableMerger
+from .merging.mergerDefault import DefaultMerger
+from .subMerger import IntelligentTokenAwareMerger

 logger = logging.getLogger(__name__)

@ -84,16 +86,25 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
        chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
        
        logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
+        logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
        
+    # Apply intelligent merging for small text parts
    if non_chunk_parts:
+        # Count text parts
+        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
+        if len(text_parts) > 5:  # If we have many small text parts, merge them
+            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
            non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
        
        # Combine non-chunk parts with chunk parts (chunks stay separate)
        parts = non_chunk_parts + chunk_parts
        
        logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
-    # DEBUG: dump parts and chunks to files  TODO TO REMOVE
+        logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
+    # DEBUG: dump parts and chunks to files - only if debug enabled
    try:
+        debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+        if debug_enabled:
            base_dir = "./test-chat/ai"
            os.makedirs(base_dir, exist_ok=True)
            
@ -146,13 +157,22 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
    kept: List[ContentPart] = []
    remaining: List[ContentPart] = []
    
-    for p in parts:
+    logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
+    
+    for i, p in enumerate(parts):
        size = int(p.metadata.get("size", 0) or 0)
+        # Show first 50 characters of text content for debugging
+        content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
+        logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
        if current + size <= maxSize:
            kept.append(p)
            current += size
+            logger.debug(f"Part {i} kept (total: {current})")
        else:
            remaining.append(p)
+            logger.debug(f"Part {i} moved to remaining")
+    
+    logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")

    # If we have remaining parts and chunking is allowed, try chunking
    if remaining and chunkAllowed:
@ -160,12 +180,15 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
        logger.debug(f"Remaining parts to chunk: {len(remaining)}")
        logger.debug(f"Max size limit: {maxSize} bytes")
        logger.debug(f"Current size used: {current} bytes")
+        logger.debug(f"Chunking {len(remaining)} remaining parts")
        
        for p in remaining:
-            if p.typeGroup in ("text", "table", "structure", "image"):
+            if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
                logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
+                logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
                chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
                logger.debug(f"Created {len(chunks)} chunks")
+                logger.debug(f"Created {len(chunks)} chunks")
                
                chunks_added = 0
                for ch in chunks:
@ -197,12 +220,18 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
        
        logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
        
+    # Apply intelligent merging for small text parts
    if non_chunk_parts:
+        # Count text parts
+        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
+        if len(text_parts) > 5:  # If we have many small text parts, merge them
+            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
            non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
        
        # Combine non-chunk parts with chunk parts (chunks stay separate)
        kept = non_chunk_parts + chunk_parts
        
+        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
        
        # Re-check size after merging
@ -211,11 +240,30 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
            # Apply size limit to merged parts
            kept = _applySizeLimit(kept, maxSize)

+    logger.debug(f"poolAndLimit returning {len(kept)} parts")
    return kept


 def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
-    """Apply merging strategy to parts."""
+    """Apply merging strategy to parts with intelligent token-aware merging."""
+    logger.debug(f"_applyMerging called with {len(parts)} parts")
+    
+    # Check if intelligent merging is enabled
+    if strategy.get("useIntelligentMerging", False):
+        model_capabilities = strategy.get("modelCapabilities", {})
+        subMerger = IntelligentTokenAwareMerger(model_capabilities)
+        
+        # Use intelligent merging for all parts
+        merged = subMerger.merge_chunks_intelligently(parts, strategy.get("prompt", ""))
+        
+        # Calculate and log optimization stats
+        stats = subMerger.calculate_optimization_stats(parts, merged)
+        logger.info(f"🧠 Intelligent merging stats: {stats}")
+        logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
+        
+        return merged
+    
+    # Fallback to traditional merging
    textMerger = TextMerger()
    tableMerger = TableMerger()
    defaultMerger = DefaultMerger()
@ -226,18 +274,29 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
    structureParts = [p for p in parts if p.typeGroup == "structure"]
    otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
    
+    logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
+    
    merged: List[ContentPart] = []
    
    if textParts:
-        merged.extend(textMerger.merge(textParts, strategy))
+        textMerged = textMerger.merge(textParts, strategy)
+        logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
+        merged.extend(textMerged)
    if tableParts:
-        merged.extend(tableMerger.merge(tableParts, strategy))
+        tableMerged = tableMerger.merge(tableParts, strategy)
+        logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
+        merged.extend(tableMerged)
    if structureParts:
        # For now, treat structure like text
-        merged.extend(textMerger.merge(structureParts, strategy))
+        structureMerged = textMerger.merge(structureParts, strategy)
+        logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
+        merged.extend(structureMerged)
    if otherParts:
-        merged.extend(defaultMerger.merge(otherParts, strategy))
+        otherMerged = defaultMerger.merge(otherParts, strategy)
+        logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
+        merged.extend(otherMerged)
    
+    logger.debug(f"_applyMerging returning {len(merged)} parts")
    return merged


--- a/modules/services/serviceExtraction/subRegistry.py
+++ b/modules/services/serviceExtraction/subRegistry.py
@ -1,15 +1,38 @@
 from typing import Any, Dict, Optional
+import logging

 from modules.datamodels.datamodelExtraction import ContentPart

+logger = logging.getLogger(__name__)
+

 class Extractor:
+    """
+    Base class for all document extractors.
+    
+    Each extractor should implement:
+    - detect(): Check if this extractor can handle the given file
+    - extract(): Extract content from the file
+    - getSupportedExtensions(): Return supported file extensions
+    - getSupportedMimeTypes(): Return supported MIME types
+    """
+    
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
+        """Check if this extractor can handle the given file."""
        return False

    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> list[ContentPart]:
+        """Extract content from the file bytes."""
        raise NotImplementedError
    
+    def getSupportedExtensions(self) -> list[str]:
+        """Return list of supported file extensions (including dots)."""
+        return []
+    
+    def getSupportedMimeTypes(self) -> list[str]:
+        """Return list of supported MIME types."""
+        return []
+

 class Chunker:
    def chunk(self, part: ContentPart, options: Dict[str, Any]) -> list[Dict[str, Any]]:
@ -20,51 +43,86 @@ class ExtractorRegistry:
    def __init__(self):
        self._map: Dict[str, Extractor] = {}
        self._fallback: Optional[Extractor] = None
-        # Register built-ins
+        self._auto_discover_extractors()
+    
+    def _auto_discover_extractors(self):
+        """Auto-discover and register all extractors from the extractors directory."""
        try:
-            from .formats.text_extractor import TextExtractor
-            from .formats.csv_extractor import CsvExtractor
-            from .formats.json_extractor import JsonExtractor
-            from .formats.xml_extractor import XmlExtractor
-            from .formats.html_extractor import HtmlExtractor
-            from .formats.pdf_extractor import PdfExtractor
-            from .formats.docx_extractor import DocxExtractor
-            from .formats.xlsx_extractor import XlsxExtractor
-            from .formats.image_extractor import ImageExtractor
-            from .formats.binary_extractor import BinaryExtractor
-            self.register("text/plain", TextExtractor())
-            self.register("text/markdown", TextExtractor())
-            self.register("text/csv", CsvExtractor())
-            self.register("application/json", JsonExtractor())
-            self.register("application/xml", XmlExtractor())
-            self.register("text/html", HtmlExtractor())
-            self.register("application/pdf", PdfExtractor())
-            self.register("application/vnd.openxmlformats-officedocument.wordprocessingml.document", DocxExtractor())
-            self.register("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", XlsxExtractor())
-            # images
-            self.register("image/jpeg", ImageExtractor())
-            self.register("image/png", ImageExtractor())
-            self.register("image/gif", ImageExtractor())
-            # extension fallbacks
-            self.register("txt", TextExtractor())
-            self.register("md", TextExtractor())
-            self.register("csv", CsvExtractor())
-            self.register("json", JsonExtractor())
-            self.register("xml", XmlExtractor())
-            self.register("html", HtmlExtractor())
-            self.register("htm", HtmlExtractor())
-            self.register("pdf", PdfExtractor())
-            self.register("docx", DocxExtractor())
-            self.register("xlsx", XlsxExtractor())
-            self.register("xlsm", XlsxExtractor())
-            # fallback
-            self.setFallback(BinaryExtractor())
-            print(f"✅ ExtractorRegistry: Successfully registered {len(self._map)} extractors")
+            import os
+            import importlib
+            from pathlib import Path
+            
+            # Get the extractors directory
+            current_dir = Path(__file__).parent
+            extractors_dir = current_dir / "extractors"
+            
+            if not extractors_dir.exists():
+                logger.error(f"Extractors directory not found: {extractors_dir}")
+                return
+            
+            # Import all extractor modules
+            extractor_modules = []
+            for file_path in extractors_dir.glob("extractor*.py"):
+                if file_path.name == "__init__.py":
+                    continue
+                
+                module_name = file_path.stem
+                try:
+                    # Import the module
+                    module = importlib.import_module(f".{module_name}", package="modules.services.serviceExtraction.extractors")
+                    
+                    # Find all extractor classes in the module
+                    for attr_name in dir(module):
+                        attr = getattr(module, attr_name)
+                        if (isinstance(attr, type) and 
+                            issubclass(attr, Extractor) and 
+                            attr != Extractor and
+                            not attr_name.startswith('_')):
+                            
+                            # Create instance and auto-register
+                            extractor_instance = attr()
+                            self._auto_register_extractor(extractor_instance)
+                            extractor_modules.append(attr_name)
+                            
                except Exception as e:
-            print(f"❌ ExtractorRegistry: Failed to register extractors: {str(e)}")
+                    logger.warning(f"Failed to import {module_name}: {str(e)}")
+                    continue
+            
+            # Set fallback extractor
+            try:
+                from .extractors.extractorBinary import BinaryExtractor
+                self.setFallback(BinaryExtractor())
+            except Exception as e:
+                logger.warning(f"Failed to set fallback extractor: {str(e)}")
+            
+            logger.info(f"ExtractorRegistry: Auto-discovered and registered {len(extractor_modules)} extractor classes: {', '.join(extractor_modules)}")
+            logger.info(f"ExtractorRegistry: Total registered formats: {len(self._map)}")
+            
+        except Exception as e:
+            logger.error(f"ExtractorRegistry: Failed to auto-discover extractors: {str(e)}")
            import traceback
            traceback.print_exc()
    
+    def _auto_register_extractor(self, extractor: Extractor):
+        """Auto-register an extractor based on its declared supported formats."""
+        try:
+            # Register MIME types
+            mime_types = extractor.getSupportedMimeTypes()
+            for mime_type in mime_types:
+                self.register(mime_type, extractor)
+                logger.debug(f"Registered MIME type: {mime_type} → {extractor.__class__.__name__}")
+            
+            # Register file extensions
+            extensions = extractor.getSupportedExtensions()
+            for ext in extensions:
+                # Remove leading dot for registry key
+                ext_key = ext.lstrip('.')
+                self.register(ext_key, extractor)
+                logger.debug(f"Registered extension: .{ext_key} → {extractor.__class__.__name__}")
+                
+        except Exception as e:
+            logger.error(f"Failed to auto-register {extractor.__class__.__name__}: {str(e)}")
+
    def register(self, key: str, extractor: Extractor):
        self._map[key] = extractor

@ -81,6 +139,43 @@ class ExtractorRegistry:
                return self._map[ext]
        return self._fallback
    
+    def getAllSupportedFormats(self) -> Dict[str, Dict[str, list[str]]]:
+        """
+        Get all supported formats from all registered extractors.
+        
+        Returns:
+            Dictionary with format information:
+            {
+                "extensions": {
+                    "extractor_name": [".ext1", ".ext2", ...]
+                },
+                "mime_types": {
+                    "extractor_name": ["mime/type1", "mime/type2", ...]
+                }
+            }
+        """
+        formats = {"extensions": {}, "mime_types": {}}
+        
+        # Get formats from registered extractors
+        for key, extractor in self._map.items():
+            if hasattr(extractor, 'getSupportedExtensions'):
+                extensions = extractor.getSupportedExtensions()
+                if extensions:
+                    formats["extensions"][key] = extensions
+            
+            if hasattr(extractor, 'getSupportedMimeTypes'):
+                mime_types = extractor.getSupportedMimeTypes()
+                if mime_types:
+                    formats["mime_types"][key] = mime_types
+        
+        # Add fallback extractor info
+        if self._fallback and hasattr(self._fallback, 'getSupportedExtensions'):
+            formats["extensions"]["fallback"] = self._fallback.getSupportedExtensions()
+        if self._fallback and hasattr(self._fallback, 'getSupportedMimeTypes'):
+            formats["mime_types"]["fallback"] = self._fallback.getSupportedMimeTypes()
+        
+        return formats
+

 class ChunkerRegistry:
    def __init__(self):
@ -88,17 +183,19 @@ class ChunkerRegistry:
        self._noop = Chunker()
        # Register default chunkers
        try:
-            from .chunking.text_chunker import TextChunker
-            from .chunking.table_chunker import TableChunker
-            from .chunking.structure_chunker import StructureChunker
-            # Skip ImageChunker for now to avoid PIL import hang
-            # from .chunking.image_chunker import ImageChunker
+            from .chunking.chunkerText import TextChunker
+            from .chunking.chunkerTable import TableChunker
+            from .chunking.chunkerStructure import StructureChunker
+            from .chunking.chunkerImage import ImageChunker
            self.register("text", TextChunker())
            self.register("table", TableChunker())
            self.register("structure", StructureChunker())
-            # self.register("image", ImageChunker())
+            self.register("image", ImageChunker())
+            # Use text chunker for container and binary content
+            self.register("container", TextChunker())
+            self.register("binary", TextChunker())
        except Exception as e:
-            print(f"❌ ChunkerRegistry: Failed to register chunkers: {str(e)}")
+            logger.error(f"ChunkerRegistry: Failed to register chunkers: {str(e)}")
            import traceback
            traceback.print_exc()

--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -1,6 +1,7 @@
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+import json
+from typing import Any, Dict, List, Optional, Union, Tuple
 from datetime import datetime, UTC
 import re
 from modules.shared.timezoneUtils import get_utc_timestamp
@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
 class GenerationService:
    def __init__(self, serviceCenter=None):
        # Directly use interfaces from the provided service center (no self.service calls)
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
        self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
        self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
@ -296,101 +297,237 @@ class GenerationService:
                'workflowId': 'unknown'
            }

-    async def renderReport(self, extracted_content: str, output_format: str, title: str) -> tuple[str, str]:
+    async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
        """
-        Render extracted content to the specified output format.
+        Render extracted JSON content to the specified output format.
        
        Args:
-            extracted_content: Content extracted by AI using format-specific prompt
-            output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
+            extractedContent: Structured JSON document from AI extraction
+            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
            title: Report title
+            userPrompt: User's original prompt for report generation
+            aiService: AI service instance for generation prompt creation
            
        Returns:
            tuple: (rendered_content, mime_type)
        """
        try:
-            # DEBUG: dump renderer input to diagnose JSON+HTML mixtures TODO REMOVE
+            # Validate JSON input
+            if not isinstance(extractedContent, dict):
+                raise ValueError("extractedContent must be a JSON dictionary")
+            
+            if "sections" not in extractedContent:
+                raise ValueError("extractedContent must contain 'sections' field")
+
+            # DEBUG: Log renderer input metadata only (no verbose JSON) - only if debug enabled
            try:
+                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                if debug_enabled:
                    import os
                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
                    debug_root = "./test-chat/ai"
                    debug_dir = os.path.join(debug_root, f"render_input_{ts}")
                    os.makedirs(debug_dir, exist_ok=True)
                    with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
-                    f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
-                with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
-                    f.write(extracted_content or "")
+                        f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
+                        f.write(f"content_size: {len(str(extractedContent))} characters\n")
+                        f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
            except Exception:
                pass

            # Get the appropriate renderer for the format
-            renderer = self._getFormatRenderer(output_format)
+            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
-                raise ValueError(f"Unsupported output format: {output_format}")
+                raise ValueError(f"Unsupported output format: {outputFormat}")
            
-            # Render the content
-            rendered_content, mime_type = await renderer.render(extracted_content, title)
+            # Render the JSON content directly (AI generation handled by main service)
+            renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
            # DEBUG: dump rendered output
            try:
                import os
                with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
-                    f.write(rendered_content or "")
+                    f.write(renderedContent or "")
            except Exception:
                pass
            
-            logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
-            return rendered_content, mime_type
+            logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
+            return renderedContent, mimeType
            
        except Exception as e:
-            logger.error(f"Error rendering report to {output_format}: {str(e)}")
+            logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
            raise
    
-    def getExtractionPrompt(self, output_format: str, user_prompt: str, title: str) -> str:
+    async def getAdaptiveExtractionPrompt(
+        self,
+        outputFormat: str,
+        userPrompt: str,
+        title: str,
+        promptAnalysis: Dict[str, Any],
+        aiService=None
+    ) -> str:
+        """Get adaptive extraction prompt based on AI analysis."""
+        from .subPromptBuilder import buildAdaptiveExtractionPrompt
+        return await buildAdaptiveExtractionPrompt(
+            outputFormat=outputFormat,
+            userPrompt=userPrompt,
+            title=title,
+            promptAnalysis=promptAnalysis,
+            aiService=aiService,
+            services=self.services
+        )
+    
+    async def getGenerationPrompt(
+        self,
+        outputFormat: str,
+        userPrompt: str,
+        title: str,
+        aiService=None
+    ) -> str:
+        """Get generation prompt for enhancing extracted JSON content."""
+        from .subPromptBuilder import buildGenerationPrompt
+        return await buildGenerationPrompt(
+            outputFormat=outputFormat,
+            userPrompt=userPrompt,
+            title=title,
+            aiService=aiService,
+            services=self.services
+        )
+    
+    async def getGenericExtractionPrompt(
+        self,
+        outputFormat: str,
+        userPrompt: str,
+        title: str,
+        aiService=None
+    ) -> str:
+        """Get generic extraction prompt that works for both single and multi-file."""
+        from .subPromptBuilder import buildGenericExtractionPrompt
+        return await buildGenericExtractionPrompt(
+            outputFormat=outputFormat,
+            userPrompt=userPrompt,
+            title=title,
+            aiService=aiService,
+            services=self.services
+        )
+
+    async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
        """
        Get the format-specific extraction prompt for AI content extraction.
        
        Args:
-            output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
-            user_prompt: User's original prompt for report generation
+            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
+            userPrompt: User's original prompt for report generation
            title: Report title
+            aiService: AI service instance for intent extraction
            
        Returns:
            str: Format-specific prompt for AI extraction
        """
        try:
            # Get the appropriate renderer for the format
-            renderer = self._getFormatRenderer(output_format)
+            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
-                raise ValueError(f"Unsupported output format: {output_format}")
+                raise ValueError(f"Unsupported output format: {outputFormat}")
            
            # Build centralized prompt with generic rules + format-specific guidelines
-            from .prompt_builder import buildExtractionPrompt
-            extraction_prompt = buildExtractionPrompt(
-                output_format=output_format,
+            from .subPromptBuilder import buildExtractionPrompt
+            extractionPrompt = await buildExtractionPrompt(
+                outputFormat=outputFormat,
                renderer=renderer,
-                user_prompt=user_prompt,
-                title=title
+                userPrompt=userPrompt,
+                title=title,
+                aiService=aiService,
+                services=self.services
            )
            
-            logger.info(f"Generated {output_format}-specific extraction prompt: {len(extraction_prompt)} characters")
-            return extraction_prompt
+            logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters")
+            return extractionPrompt
            
        except Exception as e:
-            logger.error(f"Error getting extraction prompt for {output_format}: {str(e)}")
+            logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
            raise

+    async def renderAdaptiveReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None,
+        isMultiFile: bool = False
+    ) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
+        """Render report adaptively based on content structure."""
+        
+        if isMultiFile and "documents" in extractedContent:
+            return await self._renderMultiFileReport(
+                extractedContent, outputFormat, title, userPrompt, aiService
+            )
+        else:
+            return await self._renderSingleFileReport(
+                extractedContent, outputFormat, title, userPrompt, aiService
+            )
+    
+    async def _renderMultiFileReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None
+    ) -> List[Dict[str, Any]]:
+        """Render multiple documents from extracted content."""
+        
+        generated_documents = []
+        
+        for doc_data in extractedContent.get("documents", []):
+            # Use existing single-file renderer for each document
+            renderer = self._getFormatRenderer(outputFormat)
+            if not renderer:
+                continue
+            
+            # Render individual document
+            rendered_content, mime_type = await renderer.render(
+                extractedContent={"sections": doc_data["sections"]},
+                title=doc_data["title"],
+                userPrompt=userPrompt,
+                aiService=aiService
+            )
+            
+            generated_documents.append({
+                "filename": doc_data["filename"],
+                "content": rendered_content,
+                "mime_type": mime_type,
+                "title": doc_data["title"]
+            })
+        
+        return generated_documents
+    
+    async def _renderSingleFileReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None
+    ) -> Tuple[str, str]:
+        """Render single file report (existing functionality)."""
+        # Use existing renderReport method
+        return await self.renderReport(
+            extractedContent, outputFormat, title, userPrompt, aiService
+        )
+
    def _getFormatRenderer(self, output_format: str):
        """Get the appropriate renderer for the specified format using auto-discovery."""
        try:
            from .renderers.registry import get_renderer
-            renderer = get_renderer(output_format)
+            renderer = get_renderer(output_format, services=self.services)
            
            if renderer:
                return renderer
            
            # Fallback to text renderer if no specific renderer found
            logger.warning(f"No renderer found for format {output_format}, falling back to text")
-            fallback_renderer = get_renderer('text')
+            fallback_renderer = get_renderer('text', services=self.services)
            if fallback_renderer:
                return fallback_renderer
            
--- a/modules/services/serviceGeneration/prompt_builder.py
+++ b/modules/services/serviceGeneration/prompt_builder.py
@ -1,72 +0,0 @@
-"""
-Centralized prompt builder for document generation across formats.
-
-Builds a robust prompt that:
- Accepts any user intent (no fixed structure assumptions)
- Injects format-specific guidelines from the selected renderer
- Adds a common policy section to always use real data from source docs
- Requires the AI to output a filename header that we can parse and use
-"""
-
-from typing import Protocol
-
-
-class _RendererLike(Protocol):
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:  # returns only format-specific guidelines
-        ...
-
-
-def buildExtractionPrompt(
-    output_format: str,
-    renderer: _RendererLike,
-    user_prompt: str,
-    title: str
-) -> str:
-    """
-    Build the final extraction prompt by combining:
-    - The raw user prompt (verbatim)
-    - Generic cross-format instructions (filename header + real-data policy)
-    - Format-specific guidelines snippet provided by the renderer
-
-    The AI must place a single filename header at the very top:
-    FILENAME: <safe-file-name-with-extension>
-    followed by a blank line and then ONLY the document content according to the target format.
-    """
-
-    format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
-
-    # Generic block appears once for every format
-    generic_intro = f"""
-{user_prompt}
-
-You are generating a document in {output_format.upper()} format for the title: "{title}".
-
-Rules:
- The user's intent fully defines the structure. Do not assume a fixed template or headings.
- Use only factual information extracted from the supplied source documents.
- Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
- The output must strictly follow the target format and be ready for saving without extra wrapping.
- At the VERY TOP output exactly one line with the filename header:
-  FILENAME: <safe-file-name-with-extension>
-  - The base name should be short, descriptive, and kebab-case or snake-case without spaces.
-  - Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
-  - Avoid special characters beyond [a-zA-Z0-9-_].
-  - After this header, insert a single blank line and then provide ONLY the document content.
-
-Common policy:
- Use the actual data from the source documents to create the content.
- Do not generate placeholder text or templates.
- Extract and use the real data provided in the source documents to create meaningful content.
-""".strip()
-
-    # Final assembly
-    final_prompt = (
-        generic_intro
-        + "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
-        + format_guidelines.strip()
-        + "\n\nGenerate the complete document content now based on the source documents below:"
-    )
-
-    return final_prompt
-
-
--- a/modules/services/serviceGeneration/renderers/base_renderer.py
+++ b/modules/services/serviceGeneration/renderers/base_renderer.py
@ -1,86 +0,0 @@
-"""
-Base renderer class for all format renderers.
-"""
-
-from abc import ABC, abstractmethod
-from typing import Dict, Any, Tuple, List
-import logging
-
-logger = logging.getLogger(__name__)
-
-class BaseRenderer(ABC):
-    """Base class for all format renderers."""
-    
-    def __init__(self):
-        self.logger = logger
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """
-        Return list of supported format names for this renderer.
-        Override this method in subclasses to specify supported formats.
-        """
-        return []
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """
-        Return list of format aliases for this renderer.
-        Override this method in subclasses to specify format aliases.
-        """
-        return []
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """
-        Return priority for this renderer (higher number = higher priority).
-        Used when multiple renderers support the same format.
-        """
-        return 0
-    
-    @abstractmethod
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """
-        Get the format-specific extraction prompt for AI content extraction.
-        
-        Args:
-            user_prompt: User's original prompt for report generation
-            title: Report title
-            
-        Returns:
-            str: Format-specific prompt for AI extraction
-        """
-        pass
-    
-    @abstractmethod
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """
-        Render extracted content to the target format.
-        
-        Args:
-            extracted_content: Raw content extracted by AI using format-specific prompt
-            title: Report title
-            
-        Returns:
-            tuple: (rendered_content, mime_type)
-        """
-        pass
-    
-    def _extract_sections(self, report_data: Dict[str, Any]) -> list:
-        """Extract sections from report data."""
-        return report_data.get('sections', [])
-    
-    def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
-        """Extract metadata from report data."""
-        return report_data.get('metadata', {})
-    
-    def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
-        """Get title from report data or use fallback."""
-        return report_data.get('title', fallback_title)
-    
-    def _format_timestamp(self, timestamp: str = None) -> str:
-        """Format timestamp for display."""
-        if timestamp:
-            return timestamp
-        from datetime import datetime, UTC
-        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/csv_renderer.py
+++ b/modules/services/serviceGeneration/renderers/csv_renderer.py
@ -1,64 +0,0 @@
-"""
-CSV renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-import csv
-import io
-
-class CsvRenderer(BaseRenderer):
-    """Renders content to CSV format with format-specific extraction."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported CSV formats."""
-        return ['csv']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['spreadsheet', 'table']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for CSV renderer."""
-        return 70
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only CSV-specific guidelines; global prompt is built centrally."""
-        return (
-            "CSV FORMAT GUIDELINES:\n"
-            "- Emit ONLY CSV text without fences or commentary.\n"
-            "- Include a single header row with clear column names.\n"
-            "- Quote fields containing commas, quotes, or newlines; escape quotes by doubling them.\n"
-            "- Use rows to represent items/records derived from sources.\n"
-            "- Keep cells concise; include units in headers when useful.\n"
-            "OUTPUT: Return ONLY valid CSV content that can be imported."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to CSV format."""
-        try:
-            # The extracted content should already be CSV from the AI
-            # Just clean it up
-            csv_content = self._clean_csv_content(extracted_content, title)
-            
-            return csv_content, "text/csv"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering CSV: {str(e)}")
-            # Return minimal CSV fallback
-            return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
-    
-    def _clean_csv_content(self, content: str, title: str) -> str:
-        """Clean and validate CSV content from AI."""
-        content = content.strip()
-        
-        # Remove markdown code blocks if present
-        if content.startswith("```") and content.endswith("```"):
-            lines = content.split('\n')
-            if len(lines) > 2:
-                content = '\n'.join(lines[1:-1]).strip()
-        
-        return content
--- a/modules/services/serviceGeneration/renderers/docx_renderer.py
+++ b/modules/services/serviceGeneration/renderers/docx_renderer.py
@ -1,249 +0,0 @@
-"""
-DOCX renderer for report generation using python-docx.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-import io
-import base64
-from datetime import datetime, UTC
-
-try:
-    from docx import Document
-    from docx.shared import Inches, Pt
-    from docx.enum.text import WD_ALIGN_PARAGRAPH
-    from docx.enum.table import WD_TABLE_ALIGNMENT
-    from docx.oxml.shared import OxmlElement, qn
-    from docx.oxml.ns import nsdecls
-    from docx.oxml import parse_xml
-    DOCX_AVAILABLE = True
-except ImportError:
-    DOCX_AVAILABLE = False
-
-class DocxRenderer(BaseRenderer):
-    """Renders content to DOCX format using python-docx."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported DOCX formats."""
-        return ['docx', 'doc']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['word', 'document']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for DOCX renderer."""
-        return 115
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only DOCX-specific guidelines; global prompt is built centrally."""
-        return (
-            "DOCX FORMAT GUIDELINES:\n"
-            "- Provide plain text content suitable for Word generation (no markdown/HTML).\n"
-            "- Use clear section hierarchy; bullet and numbered lists where needed.\n"
-            "- Include tables as simple pipe-delimited lines if tabular data is needed.\n"
-            "OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to DOCX format."""
-        try:
-            if not DOCX_AVAILABLE:
-                # Fallback to HTML if python-docx not available
-                from .html_renderer import HtmlRenderer
-                html_renderer = HtmlRenderer()
-                html_content, _ = await html_renderer.render(extracted_content, title)
-                return html_content, "text/html"
-            
-            # Generate DOCX using python-docx
-            docx_content = self._generate_docx(extracted_content, title)
-            
-            return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering DOCX: {str(e)}")
-            # Return minimal fallback
-            return f"DOCX Generation Error: {str(e)}", "text/plain"
-    
-    def _generate_docx(self, content: str, title: str) -> str:
-        """Generate DOCX content using python-docx."""
-        try:
-            # Create new document
-            doc = Document()
-            
-            # Set up document styles
-            self._setup_document_styles(doc)
-            
-            # Add title
-            title_para = doc.add_heading(title, 0)
-            title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
-            
-            # Add generation date
-            date_para = doc.add_paragraph(f"Generated: {self._format_timestamp()}")
-            date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
-            
-            # Add page break
-            doc.add_page_break()
-            
-            # Process content
-            lines = content.split('\n')
-            current_section = []
-            
-            for line in lines:
-                line = line.strip()
-                if not line:
-                    continue
-                
-                # Check for ALL CAPS headings (major headings)
-                if line.isupper() and len(line) > 3 and not line.startswith('-') and not line.startswith('*'):
-                    if current_section:
-                        self._process_section(doc, current_section)
-                        current_section = []
-                    doc.add_heading(line, level=1)
-                # Check for Title Case headings (subheadings)
-                elif line.istitle() and len(line) > 5 and not line.startswith('-') and not line.startswith('*') and not line.startswith(('1.', '2.', '3.', '4.', '5.')):
-                    if current_section:
-                        self._process_section(doc, current_section)
-                        current_section = []
-                    doc.add_heading(line, level=2)
-                # Check for markdown headings (fallback)
-                elif line.startswith('# '):
-                    # H1 heading
-                    if current_section:
-                        self._process_section(doc, current_section)
-                        current_section = []
-                    doc.add_heading(line[2:], level=1)
-                elif line.startswith('## '):
-                    # H2 heading
-                    if current_section:
-                        self._process_section(doc, current_section)
-                        current_section = []
-                    doc.add_heading(line[3:], level=2)
-                elif line.startswith('### '):
-                    # H3 heading
-                    if current_section:
-                        self._process_section(doc, current_section)
-                        current_section = []
-                    doc.add_heading(line[4:], level=3)
-                else:
-                    current_section.append(line)
-            
-            # Process remaining content
-            if current_section:
-                self._process_section(doc, current_section)
-            
-            # Save to buffer
-            buffer = io.BytesIO()
-            doc.save(buffer)
-            buffer.seek(0)
-            
-            # Convert to base64
-            docx_bytes = buffer.getvalue()
-            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
-            
-            return docx_base64
-            
-        except Exception as e:
-            self.logger.error(f"Error generating DOCX: {str(e)}")
-            raise
-    
-    def _setup_document_styles(self, doc):
-        """Set up document styles."""
-        try:
-            # Set default font
-            style = doc.styles['Normal']
-            font = style.font
-            font.name = 'Calibri'
-            font.size = Pt(11)
-            
-            # Set heading styles
-            for i in range(1, 4):
-                heading_style = doc.styles[f'Heading {i}']
-                heading_font = heading_style.font
-                heading_font.name = 'Calibri'
-                heading_font.size = Pt(16 - i * 2)
-                heading_font.bold = True
-        except Exception as e:
-            self.logger.warning(f"Could not set up document styles: {str(e)}")
-    
-    def _process_section(self, doc, lines: list):
-        """Process a section of content into DOCX elements."""
-        for line in lines:
-            if not line.strip():
-                continue
-            
-            # Check for tables (lines with |)
-            if '|' in line and not line.startswith('|'):
-                # This might be part of a table, process as table
-                table_data = self._extract_table_data(lines)
-                if table_data:
-                    self._add_table(doc, table_data)
-                    return
-            
-            # Check for lists
-            if line.startswith('- ') or line.startswith('* '):
-                # This is a list item
-                doc.add_paragraph(line[2:], style='List Bullet')
-            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
-                # This is a numbered list item
-                doc.add_paragraph(line[3:], style='List Number')
-            else:
-                # Regular paragraph
-                doc.add_paragraph(line)
-    
-    def _extract_table_data(self, lines: list) -> list:
-        """Extract table data from lines."""
-        table_data = []
-        in_table = False
-        
-        for line in lines:
-            if '|' in line:
-                if not in_table:
-                    in_table = True
-                # Split by | and clean up
-                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
-                if cells:
-                    table_data.append(cells)
-            elif in_table and not line.strip():
-                # Empty line, might be end of table
-                break
-        
-        return table_data if len(table_data) > 1 else []
-    
-    def _add_table(self, doc, table_data: list):
-        """Add a table to the document."""
-        try:
-            if not table_data:
-                return
-            
-            # Create table
-            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
-            table.alignment = WD_TABLE_ALIGNMENT.CENTER
-            
-            # Add data to table
-            for row_idx, row_data in enumerate(table_data):
-                for col_idx, cell_data in enumerate(row_data):
-                    if col_idx < len(table.rows[row_idx].cells):
-                        table.rows[row_idx].cells[col_idx].text = cell_data
-            
-            # Style the table
-            self._style_table(table)
-            
-        except Exception as e:
-            self.logger.warning(f"Could not add table: {str(e)}")
-    
-    def _style_table(self, table):
-        """Apply styling to the table."""
-        try:
-            # Style header row
-            if len(table.rows) > 0:
-                header_cells = table.rows[0].cells
-                for cell in header_cells:
-                    for paragraph in cell.paragraphs:
-                        for run in paragraph.runs:
-                            run.bold = True
-        except Exception as e:
-            self.logger.warning(f"Could not style table: {str(e)}")
--- a/modules/services/serviceGeneration/renderers/excel_renderer.py
+++ b/modules/services/serviceGeneration/renderers/excel_renderer.py
@ -1,210 +0,0 @@
-"""
-Excel renderer for report generation using openpyxl.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-import io
-import base64
-from datetime import datetime, UTC
-
-try:
-    from openpyxl import Workbook
-    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
-    from openpyxl.utils import get_column_letter
-    from openpyxl.worksheet.table import Table, TableStyleInfo
-    OPENPYXL_AVAILABLE = True
-except ImportError:
-    OPENPYXL_AVAILABLE = False
-
-class ExcelRenderer(BaseRenderer):
-    """Renders content to Excel format using openpyxl."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported Excel formats."""
-        return ['xlsx', 'xls', 'excel']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['spreadsheet', 'workbook']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for Excel renderer."""
-        return 110
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only Excel-specific guidelines; global prompt is built centrally."""
-        return (
-            "EXCEL FORMAT GUIDELINES:\n"
-            "- Output one or more pipe-delimited tables with a single header row.\n"
-            "- Let user intent define columns; use clear names and ISO dates.\n"
-            "- Separate multiple tables by a single blank line.\n"
-            "- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
-            "OUTPUT: Return ONLY pipe-delimited tables suitable for import."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to Excel format."""
-        try:
-            if not OPENPYXL_AVAILABLE:
-                # Fallback to CSV if openpyxl not available
-                from .csv_renderer import CsvRenderer
-                csv_renderer = CsvRenderer()
-                csv_content, _ = await csv_renderer.render(extracted_content, title)
-                return csv_content, "text/csv"
-            
-            # Generate Excel using openpyxl
-            excel_content = self._generate_excel(extracted_content, title)
-            
-            return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering Excel: {str(e)}")
-            # Return CSV fallback
-            return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
-    
-    def _generate_excel(self, content: str, title: str) -> str:
-        """Generate Excel content using openpyxl."""
-        try:
-            # Create workbook
-            wb = Workbook()
-            
-            # Remove default sheet
-            wb.remove(wb.active)
-            
-            # Create sheets
-            summary_sheet = wb.create_sheet("Summary", 0)
-            data_sheet = wb.create_sheet("Data", 1)
-            analysis_sheet = wb.create_sheet("Analysis", 2)
-            
-            # Add content to sheets
-            self._populate_summary_sheet(summary_sheet, title)
-            self._populate_data_sheet(data_sheet, content)
-            self._populate_analysis_sheet(analysis_sheet, content)
-            
-            # Save to buffer
-            buffer = io.BytesIO()
-            wb.save(buffer)
-            buffer.seek(0)
-            
-            # Convert to base64
-            excel_bytes = buffer.getvalue()
-            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
-            
-            return excel_base64
-            
-        except Exception as e:
-            self.logger.error(f"Error generating Excel: {str(e)}")
-            raise
-    
-    def _populate_summary_sheet(self, sheet, title: str):
-        """Populate the summary sheet."""
-        try:
-            # Title
-            sheet['A1'] = title
-            sheet['A1'].font = Font(size=16, bold=True)
-            sheet['A1'].alignment = Alignment(horizontal='center')
-            
-            # Generation info
-            sheet['A3'] = "Generated:"
-            sheet['B3'] = self._format_timestamp()
-            sheet['A4'] = "Status:"
-            sheet['B4'] = "Generated Successfully"
-            
-            # Key metrics placeholder
-            sheet['A6'] = "Key Metrics:"
-            sheet['A6'].font = Font(bold=True)
-            sheet['A7'] = "Total Items:"
-            sheet['B7'] = "=COUNTA(Data!A:A)-1"  # Count non-empty cells in Data sheet
-            
-            # Auto-adjust column widths
-            sheet.column_dimensions['A'].width = 20
-            sheet.column_dimensions['B'].width = 30
-            
-        except Exception as e:
-            self.logger.warning(f"Could not populate summary sheet: {str(e)}")
-    
-    def _populate_data_sheet(self, sheet, content: str):
-        """Populate the data sheet."""
-        try:
-            # Headers
-            headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
-            for col, header in enumerate(headers, 1):
-                cell = sheet.cell(row=1, column=col, value=header)
-                cell.font = Font(bold=True)
-                cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
-            
-            # Process content
-            lines = content.split('\n')
-            row = 2
-            
-            for line in lines:
-                line = line.strip()
-                if not line:
-                    continue
-                
-                # Check for table data (lines with |)
-                if '|' in line:
-                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
-                    for col, cell_data in enumerate(cells[:5], 1):  # Limit to 5 columns
-                        sheet.cell(row=row, column=col, value=cell_data)
-                    row += 1
-                else:
-                    # Regular content
-                    sheet.cell(row=row, column=1, value=line)
-                    row += 1
-            
-            # Auto-adjust column widths
-            for col in range(1, 6):
-                sheet.column_dimensions[get_column_letter(col)].width = 20
-            
-        except Exception as e:
-            self.logger.warning(f"Could not populate data sheet: {str(e)}")
-    
-    def _populate_analysis_sheet(self, sheet, content: str):
-        """Populate the analysis sheet."""
-        try:
-            # Title
-            sheet['A1'] = "Analysis & Insights"
-            sheet['A1'].font = Font(size=14, bold=True)
-            
-            # Content analysis
-            lines = content.split('\n')
-            row = 3
-            
-            sheet['A3'] = "Content Analysis:"
-            sheet['A3'].font = Font(bold=True)
-            row += 1
-            
-            # Count different types of content
-            table_lines = sum(1 for line in lines if '|' in line)
-            list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
-            text_lines = len(lines) - table_lines - list_lines
-            
-            sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
-            row += 1
-            sheet[f'A{row}'] = f"Table Rows: {table_lines}"
-            row += 1
-            sheet[f'A{row}'] = f"List Items: {list_lines}"
-            row += 1
-            sheet[f'A{row}'] = f"Text Lines: {text_lines}"
-            row += 2
-            
-            # Recommendations
-            sheet[f'A{row}'] = "Recommendations:"
-            sheet[f'A{row}'].font = Font(bold=True)
-            row += 1
-            sheet[f'A{row}'] = "1. Review data accuracy"
-            row += 1
-            sheet[f'A{row}'] = "2. Consider additional analysis"
-            row += 1
-            sheet[f'A{row}'] = "3. Update regularly"
-            
-            # Auto-adjust column width
-            sheet.column_dimensions['A'].width = 30
-            
-        except Exception as e:
-            self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
--- a/modules/services/serviceGeneration/renderers/html_renderer.py
+++ b/modules/services/serviceGeneration/renderers/html_renderer.py
@ -1,69 +0,0 @@
-"""
-HTML renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-
-class HtmlRenderer(BaseRenderer):
-    """Renders content to HTML format with format-specific extraction."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported HTML formats."""
-        return ['html', 'htm']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['web', 'webpage']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for HTML renderer."""
-        return 100
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only HTML-specific guidelines; global prompt is built centrally."""
-        return (
-            "HTML FORMAT GUIDELINES:\n"
-            "- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
-            "- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
-            "- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
-            "- Provide professional CSS in a <style> block; responsive, clean typography.\n"
-            "- Use h1/h2/h3 for headings; tables and lists for structure.\n"
-            "OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to HTML format."""
-        try:
-            # The extracted content should already be HTML from the AI
-            # Just clean it up and ensure it's valid
-            html_content = self._clean_html_content(extracted_content, title)
-            
-            return html_content, "text/html"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering HTML: {str(e)}")
-            # Return minimal HTML fallback
-            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
-    
-    def _clean_html_content(self, content: str, title: str) -> str:
-        """Clean and validate HTML content from AI."""
-        content = content.strip()
-        
-        # Remove markdown code blocks if present
-        if content.startswith("```") and content.endswith("```"):
-            lines = content.split('\n')
-            if len(lines) > 2:
-                content = '\n'.join(lines[1:-1]).strip()
-        
-        # Ensure it starts with DOCTYPE
-        if not content.startswith('<!DOCTYPE'):
-            if content.startswith('<html'):
-                content = '<!DOCTYPE html>\n' + content
-            else:
-                content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
-        
-        return content
--- a/modules/services/serviceGeneration/renderers/json_renderer.py
+++ b/modules/services/serviceGeneration/renderers/json_renderer.py
@ -1,74 +0,0 @@
-"""
-JSON renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-import json
-
-class JsonRenderer(BaseRenderer):
-    """Renders content to JSON format with format-specific extraction."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported JSON formats."""
-        return ['json']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['data']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for JSON renderer."""
-        return 80
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only JSON-specific guidelines; global prompt is built centrally."""
-        return (
-            "JSON FORMAT GUIDELINES:\n"
-            "- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
-            "- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
-            "- Prefer arrays/objects that map cleanly to the extracted facts.\n"
-            "- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
-            "OUTPUT: Return ONLY valid, parseable JSON."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to JSON format."""
-        try:
-            # The extracted content should already be JSON from the AI
-            # Just validate and format it
-            json_content = self._clean_json_content(extracted_content, title)
-            
-            return json_content, "application/json"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering JSON: {str(e)}")
-            # Return minimal JSON fallback
-            fallback_data = {
-                "title": title,
-                "sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
-                "metadata": {"error": str(e)}
-            }
-            return json.dumps(fallback_data, indent=2), "application/json"
-    
-    def _clean_json_content(self, content: str, title: str) -> str:
-        """Clean and validate JSON content from AI."""
-        content = content.strip()
-        
-        # Remove markdown code blocks if present
-        if content.startswith("```") and content.endswith("```"):
-            lines = content.split('\n')
-            if len(lines) > 2:
-                content = '\n'.join(lines[1:-1]).strip()
-        
-        # Validate JSON
-        try:
-            parsed = json.loads(content)
-            # Re-format with proper indentation
-            return json.dumps(parsed, indent=2, ensure_ascii=False)
-        except json.JSONDecodeError:
-            # If not valid JSON, return as-is
-            return content
--- a/modules/services/serviceGeneration/renderers/markdown_renderer.py
+++ b/modules/services/serviceGeneration/renderers/markdown_renderer.py
@ -1,65 +0,0 @@
-"""
-Markdown renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-
-class MarkdownRenderer(BaseRenderer):
-    """Renders content to Markdown format with format-specific extraction."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported Markdown formats."""
-        return ['md', 'markdown']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['mdown', 'mkd']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for markdown renderer."""
-        return 95
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only Markdown-specific guidelines; global prompt is built centrally."""
-        return (
-            "MARKDOWN FORMAT GUIDELINES:\n"
-            "- Use proper Markdown syntax only (no HTML wrappers).\n"
-            "- # for main title, ## for sections, ### for subsections.\n"
-            "- Tables with | separators and a header row.\n"
-            "- Bullet lists with - or *.\n"
-            "- Emphasis with **bold** and *italic*.\n"
-            "- Code blocks with ```language.\n"
-            "- Horizontal rules (---) to separate major sections when helpful.\n"
-            "- Include links [text](url) and images ![alt](url) when referenced by sources.\n"
-            "OUTPUT: Return ONLY raw Markdown content without code fences."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to Markdown format."""
-        try:
-            # The extracted content should already be Markdown from the AI
-            # Just clean it up
-            markdown_content = self._clean_markdown_content(extracted_content, title)
-            
-            return markdown_content, "text/markdown"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering markdown: {str(e)}")
-            # Return minimal markdown fallback
-            return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
-    
-    def _clean_markdown_content(self, content: str, title: str) -> str:
-        """Clean and validate Markdown content from AI."""
-        content = content.strip()
-        
-        # Remove markdown code blocks if present
-        if content.startswith("```") and content.endswith("```"):
-            lines = content.split('\n')
-            if len(lines) > 2:
-                content = '\n'.join(lines[1:-1]).strip()
-        
-        return content
--- a/modules/services/serviceGeneration/renderers/pdf_renderer.py
+++ b/modules/services/serviceGeneration/renderers/pdf_renderer.py
@ -1,225 +0,0 @@
-"""
-PDF renderer for report generation using reportlab.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-import io
-import base64
-from datetime import datetime, UTC
-
-try:
-    from reportlab.lib.pagesizes import letter, A4
-    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
-    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
-    from reportlab.lib.units import inch
-    from reportlab.lib import colors
-    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
-    REPORTLAB_AVAILABLE = True
-except ImportError:
-    REPORTLAB_AVAILABLE = False
-
-class PdfRenderer(BaseRenderer):
-    """Renders content to PDF format using reportlab."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported PDF formats."""
-        return ['pdf']
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return ['document', 'print']
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for PDF renderer."""
-        return 120
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only PDF-specific guidelines; global prompt is built centrally."""
-        return (
-            "PDF FORMAT GUIDELINES:\n"
-            "- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
-            "- Use bullet lists and tables where useful; separate major sections clearly.\n"
-            "- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
-            "OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to PDF format."""
-        try:
-            if not REPORTLAB_AVAILABLE:
-                # Fallback to HTML if reportlab not available
-                from .html_renderer import HtmlRenderer
-                html_renderer = HtmlRenderer()
-                html_content, _ = await html_renderer.render(extracted_content, title)
-                return html_content, "text/html"
-            
-            # Generate PDF using reportlab
-            pdf_content = self._generate_pdf(extracted_content, title)
-            
-            return pdf_content, "application/pdf"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering PDF: {str(e)}")
-            # Return minimal fallback
-            return f"PDF Generation Error: {str(e)}", "text/plain"
-    
-    def _generate_pdf(self, content: str, title: str) -> str:
-        """Generate PDF content using reportlab."""
-        try:
-            # Create a buffer to hold the PDF
-            buffer = io.BytesIO()
-            
-            # Create PDF document
-            doc = SimpleDocTemplate(
-                buffer,
-                pagesize=A4,
-                rightMargin=72,
-                leftMargin=72,
-                topMargin=72,
-                bottomMargin=18
-            )
-            
-            # Get styles
-            styles = getSampleStyleSheet()
-            
-            # Create custom styles
-            title_style = ParagraphStyle(
-                'CustomTitle',
-                parent=styles['Heading1'],
-                fontSize=24,
-                spaceAfter=30,
-                alignment=TA_CENTER,
-                textColor=colors.darkblue
-            )
-            
-            heading_style = ParagraphStyle(
-                'CustomHeading',
-                parent=styles['Heading2'],
-                fontSize=16,
-                spaceAfter=12,
-                spaceBefore=12,
-                textColor=colors.darkblue
-            )
-            
-            # Build PDF content
-            story = []
-            
-            # Title page
-            story.append(Paragraph(title, title_style))
-            story.append(Spacer(1, 20))
-            story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
-            story.append(PageBreak())
-            
-            # Process content
-            lines = content.split('\n')
-            current_section = []
-            
-            for line in lines:
-                line = line.strip()
-                if not line:
-                    continue
-                
-                # Check for headings
-                if line.startswith('# '):
-                    # H1 heading
-                    if current_section:
-                        story.extend(self._process_section(current_section, styles))
-                        current_section = []
-                    story.append(Paragraph(line[2:], title_style))
-                    story.append(Spacer(1, 12))
-                elif line.startswith('## '):
-                    # H2 heading
-                    if current_section:
-                        story.extend(self._process_section(current_section, styles))
-                        current_section = []
-                    story.append(Paragraph(line[3:], heading_style))
-                    story.append(Spacer(1, 8))
-                elif line.startswith('### '):
-                    # H3 heading
-                    if current_section:
-                        story.extend(self._process_section(current_section, styles))
-                        current_section = []
-                    story.append(Paragraph(line[4:], styles['Heading3']))
-                    story.append(Spacer(1, 6))
-                else:
-                    current_section.append(line)
-            
-            # Process remaining content
-            if current_section:
-                story.extend(self._process_section(current_section, styles))
-            
-            # Build PDF
-            doc.build(story)
-            
-            # Get PDF content as base64
-            buffer.seek(0)
-            pdf_bytes = buffer.getvalue()
-            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
-            
-            return pdf_base64
-            
-        except Exception as e:
-            self.logger.error(f"Error generating PDF: {str(e)}")
-            raise
-    
-    def _process_section(self, lines: list, styles) -> list:
-        """Process a section of content into PDF elements."""
-        elements = []
-        
-        for line in lines:
-            if not line.strip():
-                continue
-            
-            # Check for tables (lines with |)
-            if '|' in line and not line.startswith('|'):
-                # This might be part of a table, process as table
-                table_data = self._extract_table_data(lines)
-                if table_data:
-                    table = Table(table_data)
-                    table.setStyle(TableStyle([
-                        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
-                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
-                        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
-                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
-                        ('FONTSIZE', (0, 0), (-1, 0), 14),
-                        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
-                        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
-                        ('GRID', (0, 0), (-1, -1), 1, colors.black)
-                    ]))
-                    elements.append(table)
-                    elements.append(Spacer(1, 12))
-                    return elements
-            
-            # Check for lists
-            if line.startswith('- ') or line.startswith('* '):
-                # This is a list item
-                elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
-            else:
-                # Regular paragraph
-                elements.append(Paragraph(line, styles['Normal']))
-        
-        elements.append(Spacer(1, 6))
-        return elements
-    
-    def _extract_table_data(self, lines: list) -> list:
-        """Extract table data from lines."""
-        table_data = []
-        in_table = False
-        
-        for line in lines:
-            if '|' in line:
-                if not in_table:
-                    in_table = True
-                # Split by | and clean up
-                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
-                if cells:
-                    table_data.append(cells)
-            elif in_table and not line.strip():
-                # Empty line, might be end of table
-                break
-        
-        return table_data if len(table_data) > 1 else []
--- a/modules/services/serviceGeneration/renderers/registry.py
+++ b/modules/services/serviceGeneration/renderers/registry.py
@ -6,7 +6,7 @@ import logging
 import importlib
 import pkgutil
 from typing import Dict, Type, List, Optional
-from .base_renderer import BaseRenderer
+from .rendererBaseTemplate import BaseRenderer

 logger = logging.getLogger(__name__)

@ -37,7 +37,7 @@ class RendererRegistry:
            
            # Scan all Python files in the renderers directory
            for file_path in renderers_dir.glob("*.py"):
-                if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
+                if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
                    continue
                
                # Extract module name from filename
@ -92,7 +92,7 @@ class RendererRegistry:
        except Exception as e:
            logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
    
-    def get_renderer(self, output_format: str) -> Optional[BaseRenderer]:
+    def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
        """Get a renderer instance for the specified format."""
        if not self._discovered:
            self.discover_renderers()
@ -109,7 +109,7 @@ class RendererRegistry:
        
        if renderer_class:
            try:
-                return renderer_class()
+                return renderer_class(services=services)
            except Exception as e:
                logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
                return None
@ -144,9 +144,9 @@ class RendererRegistry:
 # Global registry instance
 _registry = RendererRegistry()

-def get_renderer(output_format: str) -> Optional[BaseRenderer]:
+def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
    """Get a renderer instance for the specified format."""
-    return _registry.get_renderer(output_format)
+    return _registry.get_renderer(output_format, services)

 def get_supported_formats() -> List[str]:
    """Get list of all supported formats."""
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@ -0,0 +1,459 @@
+"""
+Base renderer class for all format renderers.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Tuple, List
+import logging
+import json
+
+logger = logging.getLogger(__name__)
+
+class BaseRenderer(ABC):
+    """Base class for all format renderers."""
+    
+    def __init__(self, services=None):
+        self.logger = logger
+        self.services = services  # Add services attribute
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """
+        Return list of supported format names for this renderer.
+        Override this method in subclasses to specify supported formats.
+        """
+        return []
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """
+        Return list of format aliases for this renderer.
+        Override this method in subclasses to specify format aliases.
+        """
+        return []
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """
+        Return priority for this renderer (higher number = higher priority).
+        Used when multiple renderers support the same format.
+        """
+        return 0
+    
+    @abstractmethod
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """
+        Render extracted JSON content to the target format.
+        
+        Args:
+            extracted_content: Structured JSON content with sections and metadata
+            title: Report title
+            user_prompt: Original user prompt for context
+            ai_service: AI service instance for additional processing
+            
+        Returns:
+            tuple: (rendered_content, mime_type)
+        """
+        pass
+    
+    def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Extract sections from report data."""
+        return report_data.get('sections', [])
+    
+    def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract metadata from report data."""
+        return report_data.get('metadata', {})
+    
+    def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
+        """Get title from report data or use fallback."""
+        metadata = report_data.get('metadata', {})
+        return metadata.get('title', fallback_title)
+    
+    def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
+        """Validate that JSON content has the expected structure."""
+        if not isinstance(json_content, dict):
+            return False
+        
+        if "sections" not in json_content:
+            return False
+        
+        sections = json_content.get("sections", [])
+        if not isinstance(sections, list):
+            return False
+        
+        # Validate each section has content_type and elements
+        for section in sections:
+            if not isinstance(section, dict):
+                return False
+            if "content_type" not in section or "elements" not in section:
+                return False
+        
+        return True
+    
+    def _get_section_type(self, section: Dict[str, Any]) -> str:
+        """Get the type of a section."""
+        return section.get("content_type", "paragraph")
+    
+    def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Get the elements of a section."""
+        return section.get("elements", [])
+    
+    def _get_section_id(self, section: Dict[str, Any]) -> str:
+        """Get the ID of a section (if available)."""
+        return section.get("id", "unknown")
+    
+    def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
+        """Extract table headers and rows from section data."""
+        headers = section_data.get("headers", [])
+        rows = section_data.get("rows", [])
+        return headers, rows
+    
+    def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
+        """Extract bullet list items from section data."""
+        items = section_data.get("items", [])
+        result = []
+        for item in items:
+            if isinstance(item, str):
+                result.append(item)
+            elif isinstance(item, dict) and "text" in item:
+                result.append(item["text"])
+        return result
+    
+    def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
+        """Extract heading level and text from section data."""
+        level = section_data.get("level", 1)
+        text = section_data.get("text", "")
+        return level, text
+    
+    def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
+        """Extract paragraph text from section data."""
+        return section_data.get("text", "")
+    
+    def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
+        """Extract code and language from section data."""
+        code = section_data.get("code", "")
+        language = section_data.get("language", "")
+        return code, language
+    
+    def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
+        """Extract base64 data and alt text from section data."""
+        base64_data = section_data.get("base64Data", "")
+        alt_text = section_data.get("altText", "Image")
+        return base64_data, alt_text
+    
+    def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
+        """
+        Render an image section. This is a base implementation that should be overridden
+        by format-specific renderers.
+        
+        Args:
+            section: Image section data
+            styles: Optional styling information
+            
+        Returns:
+            Format-specific image representation
+        """
+        section_data = self._get_section_data(section)
+        base64_data, alt_text = self._extract_image_data(section_data)
+        
+        # Base implementation returns a simple dict
+        # Format-specific renderers should override this method
+        return {
+            "content_type": "image",
+            "base64Data": base64_data,
+            "altText": alt_text,
+            "width": section_data.get("width", None),
+            "height": section_data.get("height", None),
+            "caption": section_data.get("caption", "")
+        }
+    
+    def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
+        """Validate image data."""
+        if not base64_data:
+            self.logger.warning("Image section has no base64 data")
+            return False
+        
+        if not alt_text:
+            self.logger.warning("Image section has no alt text")
+            return False
+        
+        # Basic base64 validation
+        try:
+            import base64
+            base64.b64decode(base64_data, validate=True)
+            return True
+        except Exception as e:
+            self.logger.warning(f"Invalid base64 image data: {str(e)}")
+            return False
+    
+    def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
+        """
+        Get image dimensions from base64 data.
+        This is a helper method that format-specific renderers can use.
+        """
+        try:
+            import base64
+            from PIL import Image
+            import io
+            
+            # Decode base64 data
+            image_data = base64.b64decode(base64_data)
+            image = Image.open(io.BytesIO(image_data))
+            
+            return image.size  # Returns (width, height)
+            
+        except Exception as e:
+            self.logger.warning(f"Could not determine image dimensions: {str(e)}")
+            return (0, 0)
+    
+    def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
+        """
+        Resize image if it exceeds maximum dimensions.
+        Returns the resized image as base64 string.
+        """
+        try:
+            import base64
+            from PIL import Image
+            import io
+            
+            # Decode base64 data
+            image_data = base64.b64decode(base64_data)
+            image = Image.open(io.BytesIO(image_data))
+            
+            # Check if resizing is needed
+            width, height = image.size
+            if width <= max_width and height <= max_height:
+                return base64_data  # No resizing needed
+            
+            # Calculate new dimensions maintaining aspect ratio
+            ratio = min(max_width / width, max_height / height)
+            new_width = int(width * ratio)
+            new_height = int(height * ratio)
+            
+            # Resize image
+            resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
+            
+            # Convert back to base64
+            buffer = io.BytesIO()
+            resized_image.save(buffer, format=image.format or 'PNG')
+            resized_data = buffer.getvalue()
+            
+            return base64.b64encode(resized_data).decode('utf-8')
+            
+        except Exception as e:
+            self.logger.warning(f"Could not resize image: {str(e)}")
+            return base64_data  # Return original if resize fails
+    
+    def _get_supported_section_types(self) -> List[str]:
+        """Return list of supported section types."""
+        return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
+    
+    def _is_valid_section_type(self, section_type: str) -> bool:
+        """Check if a section type is valid."""
+        return section_type in self._get_supported_section_types()
+    
+    def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
+        """Process a section and return structured data based on its type."""
+        section_type = self._get_section_type(section)
+        section_data = self._get_section_data(section)
+        
+        if section_type == "table":
+            headers, rows = self._extract_table_data(section_data)
+            return {"content_type": "table", "headers": headers, "rows": rows}
+        elif section_type == "bullet_list":
+            items = self._extract_bullet_list_items(section_data)
+            return {"content_type": "bullet_list", "items": items}
+        elif section_type == "heading":
+            level, text = self._extract_heading_data(section_data)
+            return {"content_type": "heading", "level": level, "text": text}
+        elif section_type == "paragraph":
+            text = self._extract_paragraph_text(section_data)
+            return {"content_type": "paragraph", "text": text}
+        elif section_type == "code_block":
+            code, language = self._extract_code_block_data(section_data)
+            return {"content_type": "code_block", "code": code, "language": language}
+        elif section_type == "image":
+            base64_data, alt_text = self._extract_image_data(section_data)
+            # Validate image data
+            if self._validate_image_data(base64_data, alt_text):
+                return {
+                    "content_type": "image", 
+                    "base64Data": base64_data, 
+                    "altText": alt_text,
+                    "width": section_data.get("width"),
+                    "height": section_data.get("height"),
+                    "caption": section_data.get("caption", "")
+                }
+            else:
+                # Return placeholder if image data is invalid
+                return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
+        else:
+            # Fallback to paragraph
+            text = self._extract_paragraph_text(section_data)
+            return {"content_type": "paragraph", "text": text}
+    
+    def _format_timestamp(self, timestamp: str = None) -> str:
+        """Format timestamp for display."""
+        if timestamp:
+            return timestamp
+        from datetime import datetime, UTC
+        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
+    
+    # ===== GENERIC AI STYLING HELPERS =====
+    
+    async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Generic AI styling method that can be used by all renderers.
+        
+        Args:
+            ai_service: AI service instance
+            style_template: Format-specific style template
+            default_styles: Default styles to fall back to
+            
+        Returns:
+            Dict with styling definitions
+        """
+        # DEBUG: Show which renderer is calling this method
+        
+        if not ai_service:
+            return default_styles
+        
+        try:
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=style_template, context="", options=request_options)
+            
+            # DEBUG: Show the actual prompt being sent to AI
+            self.logger.debug(f"AI Style Template Prompt:")
+            self.logger.debug(f"{style_template}")
+            
+            response = await ai_service.aiObjects.call(request)
+            
+            import json
+            import re
+            
+            # Clean and parse JSON
+            result = response.content.strip() if response and response.content else ""
+            
+            # Check if result is empty
+            if not result:
+                self.logger.warning("AI styling returned empty response, using defaults")
+                return default_styles
+            
+            # Extract JSON from markdown if present
+            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1).strip()
+            elif result.startswith('```json'):
+                result = re.sub(r'^```json\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            elif result.startswith('```'):
+                result = re.sub(r'^```\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            
+            # Try to parse JSON
+            try:
+                styles = json.loads(result)
+            except json.JSONDecodeError as json_error:
+                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+                
+                # Use print instead of logger to avoid truncation
+                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
+                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "RENDERER")
+                
+                self.logger.warning(f"Raw content that failed to parse: {result}")
+                
+                # Try to fix incomplete JSON by adding missing closing braces
+                open_braces = result.count('{')
+                close_braces = result.count('}')
+                
+                if open_braces > close_braces:
+                    # JSON is incomplete, add missing closing braces
+                    missing_braces = open_braces - close_braces
+                    result = result + '}' * missing_braces
+                    self.logger.info(f"Added {missing_braces} missing closing brace(s)")
+                    self.logger.debug(f"Fixed JSON: {result}")
+                    
+                    # Try parsing the fixed JSON
+                    try:
+                        styles = json.loads(result)
+                        self.logger.info("Successfully fixed incomplete JSON")
+                    except json.JSONDecodeError as fix_error:
+                        self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
+                        self.logger.warning(f"Fixed JSON content: {result}")
+                        # Try to extract just the JSON part if it's embedded in text
+                        json_start = result.find('{')
+                        json_end = result.rfind('}')
+                        if json_start != -1 and json_end != -1 and json_end > json_start:
+                            json_part = result[json_start:json_end+1]
+                            try:
+                                styles = json.loads(json_part)
+                                self.logger.info("Successfully extracted JSON from explanatory text")
+                            except json.JSONDecodeError:
+                                self.logger.warning("Could not extract valid JSON from response, using defaults")
+                                return default_styles
+                        else:
+                            return default_styles
+                else:
+                    # Try to extract just the JSON part if it's embedded in text
+                    json_start = result.find('{')
+                    json_end = result.rfind('}')
+                    if json_start != -1 and json_end != -1 and json_end > json_start:
+                        json_part = result[json_start:json_end+1]
+                        try:
+                            styles = json.loads(json_part)
+                            self.logger.info("Successfully extracted JSON from explanatory text")
+                        except json.JSONDecodeError:
+                            self.logger.warning("Could not extract valid JSON from response, using defaults")
+                            return default_styles
+                    else:
+                        return default_styles
+            
+            # Convert colors to appropriate format
+            styles = self._convert_colors_format(styles)
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+            return default_styles
+    
+    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Convert colors to appropriate format based on renderer type.
+        Override this method in subclasses for format-specific color handling.
+        """
+        return styles
+    
+    def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
+        """
+        Create a standardized AI style template for any format.
+        
+        Args:
+            format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
+            user_prompt: User's original prompt
+            style_schema: Format-specific style schema
+            
+        Returns:
+            Formatted prompt string
+        """
+        schema_json = json.dumps(style_schema, indent=4)
+        
+        # DEBUG: Show the schema being sent
+        
+        return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
+
+Use this schema as a template and customize the values for professional document styling:
+
+{schema_json}
+
+Requirements:
+- Return ONLY the complete JSON object (no markdown, no explanations)
+- Customize colors, fonts, and spacing for professional appearance
+- Ensure all objects are properly closed with closing braces
+- Make the styling modern and professional
+
+Return the complete JSON:"""
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@ -0,0 +1,260 @@
+"""
+CSV renderer for report generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import csv
+import io
+
+class RendererCsv(BaseRenderer):
+    """Renders content to CSV format with format-specific extraction."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported CSV formats."""
+        return ['csv']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['spreadsheet', 'table']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for CSV renderer."""
+        return 70
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to CSV format."""
+        try:
+            # Generate CSV directly from JSON (no styling needed for CSV)
+            csv_content = await self._generate_csv_from_json(extracted_content, title)
+            
+            return csv_content, "text/csv"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering CSV: {str(e)}")
+            # Return minimal CSV fallback
+            return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
+    
+    async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+        """Generate CSV content from structured JSON document."""
+        try:
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Generate CSV content
+            csv_rows = []
+            
+            # Add title row
+            if document_title:
+                csv_rows.append([document_title])
+                csv_rows.append([])  # Empty row
+            
+            # Process each section in order
+            sections = json_content.get("sections", [])
+            for section in sections:
+                section_csv = self._render_json_section_to_csv(section)
+                if section_csv:
+                    csv_rows.extend(section_csv)
+                    csv_rows.append([])  # Empty row between sections
+            
+            # Convert to CSV string
+            csv_content = self._convert_rows_to_csv(csv_rows)
+            
+            return csv_content
+            
+        except Exception as e:
+            self.logger.error(f"Error generating CSV from JSON: {str(e)}")
+            raise Exception(f"CSV generation failed: {str(e)}")
+    
+    def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
+        """Render a single JSON section to CSV rows."""
+        try:
+            section_type = section.get("content_type", "paragraph")
+            elements = section.get("elements", [])
+            
+            csv_rows = []
+            
+            # Add section title if available
+            section_title = section.get("title")
+            if section_title:
+                csv_rows.append([f"# {section_title}"])
+            
+            # Process each element in the section
+            for element in elements:
+                if section_type == "table":
+                    csv_rows.extend(self._render_json_table_to_csv(element))
+                elif section_type == "list":
+                    csv_rows.extend(self._render_json_list_to_csv(element))
+                elif section_type == "heading":
+                    csv_rows.extend(self._render_json_heading_to_csv(element))
+                elif section_type == "paragraph":
+                    csv_rows.extend(self._render_json_paragraph_to_csv(element))
+                elif section_type == "code":
+                    csv_rows.extend(self._render_json_code_to_csv(element))
+                else:
+                    # Fallback to paragraph for unknown types
+                    csv_rows.extend(self._render_json_paragraph_to_csv(element))
+            
+            return csv_rows
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
+            return [["[Error rendering section]"]]
+    
+    def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
+        """Render a JSON table to CSV rows."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            csv_rows = []
+            
+            if headers:
+                csv_rows.append(headers)
+            
+            if rows:
+                csv_rows.extend(rows)
+            
+            return csv_rows
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+            return [["[Error rendering table]"]]
+    
+    def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
+        """Render a JSON list to CSV rows."""
+        try:
+            items = list_data.get("items", [])
+            csv_rows = []
+            
+            for item in items:
+                if isinstance(item, dict):
+                    text = item.get("text", "")
+                    subitems = item.get("subitems", [])
+                    csv_rows.append([text])
+                    
+                    # Add subitems as indented rows
+                    for subitem in subitems:
+                        if isinstance(subitem, dict):
+                            csv_rows.append([f"  - {subitem.get('text', '')}"])
+                        else:
+                            csv_rows.append([f"  - {subitem}"])
+                else:
+                    csv_rows.append([str(item)])
+            
+            return csv_rows
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering list: {str(e)}")
+            return [["[Error rendering list]"]]
+    
+    def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
+        """Render a JSON heading to CSV rows."""
+        try:
+            text = heading_data.get("text", "")
+            level = heading_data.get("level", 1)
+            
+            if text:
+                # Use # symbols for heading levels
+                heading_text = f"{'#' * level} {text}"
+                return [[heading_text]]
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+            return [["[Error rendering heading]"]]
+    
+    def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
+        """Render a JSON paragraph to CSV rows."""
+        try:
+            text = paragraph_data.get("text", "")
+            
+            if text:
+                # Split long paragraphs into multiple rows if needed
+                if len(text) > 100:
+                    words = text.split()
+                    rows = []
+                    current_row = []
+                    current_length = 0
+                    
+                    for word in words:
+                        if current_length + len(word) > 100 and current_row:
+                            rows.append([" ".join(current_row)])
+                            current_row = [word]
+                            current_length = len(word)
+                        else:
+                            current_row.append(word)
+                            current_length += len(word) + 1
+                    
+                    if current_row:
+                        rows.append([" ".join(current_row)])
+                    
+                    return rows
+                else:
+                    return [[text]]
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+            return [["[Error rendering paragraph]"]]
+    
+    def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
+        """Render a JSON code block to CSV rows."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            csv_rows = []
+            
+            if language:
+                csv_rows.append([f"Code ({language}):"])
+            
+            if code:
+                # Split code into lines
+                code_lines = code.split('\n')
+                for line in code_lines:
+                    csv_rows.append([f"  {line}"])
+            
+            return csv_rows
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+            return [["[Error rendering code block]"]]
+    
+    def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
+        """Convert rows to CSV string."""
+        import csv
+        import io
+        
+        output = io.StringIO()
+        writer = csv.writer(output)
+        
+        for row in rows:
+            if row:  # Only write non-empty rows
+                writer.writerow(row)
+        
+        return output.getvalue()
+    
+    def _clean_csv_content(self, content: str, title: str) -> str:
+        """Clean and validate CSV content from AI."""
+        content = content.strip()
+        
+        # Remove markdown code blocks if present
+        if content.startswith("```") and content.endswith("```"):
+            lines = content.split('\n')
+            if len(lines) > 2:
+                content = '\n'.join(lines[1:-1]).strip()
+        
+        return content
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@ -0,0 +1,958 @@
+"""
+DOCX renderer for report generation using python-docx.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import io
+import base64
+import re
+import os
+from datetime import datetime, UTC
+
+try:
+    from docx import Document
+    from docx.shared import Inches, Pt, RGBColor
+    from docx.enum.text import WD_ALIGN_PARAGRAPH
+    from docx.enum.table import WD_TABLE_ALIGNMENT
+    from docx.oxml.shared import OxmlElement, qn
+    from docx.oxml.ns import nsdecls
+    from docx.oxml import parse_xml
+    DOCX_AVAILABLE = True
+except ImportError:
+    DOCX_AVAILABLE = False
+
+class RendererDocx(BaseRenderer):
+    """Renders content to DOCX format using python-docx."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported DOCX formats."""
+        return ['docx', 'doc']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['word', 'document']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for DOCX renderer."""
+        return 115
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to DOCX format using AI-analyzed styling."""
+        self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
+        try:
+            if not DOCX_AVAILABLE:
+                # Fallback to HTML if python-docx not available
+                from .rendererHtml import RendererHtml
+                html_renderer = RendererHtml()
+                html_content, _ = await html_renderer.render(extracted_content, title)
+                return html_content, "text/html"
+            
+            # Generate DOCX using AI-analyzed styling
+            docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
+            
+            return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering DOCX: {str(e)}")
+            # Return minimal fallback
+            return f"DOCX Generation Error: {str(e)}", "text/plain"
+    
+    async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Generate DOCX content from structured JSON document using AI-generated styling."""
+        try:
+            # Create new document
+            doc = Document()
+            
+            # Get AI-generated styling definitions
+            self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
+            styles = await self._get_docx_styles(user_prompt, ai_service)
+            
+            # Apply basic document setup
+            self._setup_basic_document_styles(doc)
+            
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Add document title using analyzed styles
+            if document_title:
+                title_heading = doc.add_heading(document_title, level=1)
+                title_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            
+            # Process each section in order
+            sections = json_content.get("sections", [])
+            for section in sections:
+                self._render_json_section(doc, section, styles)
+            
+            # Save to buffer
+            buffer = io.BytesIO()
+            doc.save(buffer)
+            buffer.seek(0)
+            
+            # Convert to base64
+            docx_bytes = buffer.getvalue()
+            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
+            
+            return docx_base64
+            
+        except Exception as e:
+            self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
+            raise Exception(f"DOCX generation failed: {str(e)}")
+    
+    async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Get DOCX styling definitions using base template AI styling."""
+        style_schema = {
+            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
+            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
+            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
+            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
+            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
+            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
+            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
+            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
+            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
+        }
+        
+        style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
+        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
+        
+        # Validate and fix contrast issues
+        return self._validate_styles_contrast(styles)
+    
+    def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and fix contrast issues in AI-generated styles."""
+        try:
+            # Fix table header contrast
+            if "table_header" in styles:
+                header = styles["table_header"]
+                bg_color = header.get("background", "#FFFFFF")
+                text_color = header.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+            
+            # Fix table cell contrast
+            if "table_cell" in styles:
+                cell = styles["table_cell"]
+                bg_color = cell.get("background", "#FFFFFF")
+                text_color = cell.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"Style validation failed: {str(e)}")
+            return self._get_default_styles()
+    
+    def _get_default_styles(self) -> Dict[str, Any]:
+        """Default DOCX styles."""
+        return {
+            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
+            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
+            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
+            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
+            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
+            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
+            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
+            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
+            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
+        }
+    
+    def _setup_basic_document_styles(self, doc: Document) -> None:
+        """Set up basic document styles."""
+        try:
+            # Set default font
+            style = doc.styles['Normal']
+            font = style.font
+            font.name = 'Calibri'
+            font.size = Pt(11)
+        except Exception as e:
+            self.logger.warning(f"Could not set up basic document styles: {str(e)}")
+    
+    
+    
+    
+    def _clear_template_content(self, doc: Document) -> None:
+        """Clear template content while preserving styles."""
+        try:
+            # Remove all paragraphs except keep the styles
+            for paragraph in list(doc.paragraphs):
+                # Keep the paragraph but clear its content
+                paragraph.clear()
+            
+            # Remove all tables
+            for table in list(doc.tables):
+                table._element.getparent().remove(table._element)
+                
+        except Exception as e:
+            self.logger.warning(f"Could not clear template content: {str(e)}")
+    
+    def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a single JSON section to DOCX using AI-generated styles."""
+        try:
+            section_type = section.get("content_type", "paragraph")
+            elements = section.get("elements", [])
+            
+            # Process each element in the section
+            for element in elements:
+                if section_type == "table":
+                    self._render_json_table(doc, element, styles)
+                elif section_type == "bullet_list":
+                    self._render_json_bullet_list(doc, element, styles)
+                elif section_type == "heading":
+                    self._render_json_heading(doc, element, styles)
+                elif section_type == "paragraph":
+                    self._render_json_paragraph(doc, element, styles)
+                elif section_type == "code_block":
+                    self._render_json_code_block(doc, element, styles)
+                elif section_type == "image":
+                    self._render_json_image(doc, element, styles)
+                else:
+                    # Fallback to paragraph for unknown types
+                    self._render_json_paragraph(doc, element, styles)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
+            # Add error paragraph as fallback
+            error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
+    
+    def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON table to DOCX using AI-generated styles."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            if not headers or not rows:
+                return
+            
+            # Create table
+            table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
+            table.alignment = WD_TABLE_ALIGNMENT.CENTER
+            
+            # Apply table borders based on AI style
+            border_style = styles["table_border"]["style"]
+            if border_style == "horizontal_only":
+                self._apply_horizontal_borders_only(table)
+            elif border_style == "grid":
+                table.style = 'Table Grid'
+            # else: no borders
+            
+            # Add headers with AI-generated styling
+            header_row = table.rows[0]
+            header_style = styles["table_header"]
+            for i, header in enumerate(headers):
+                if i < len(header_row.cells):
+                    cell = header_row.cells[i]
+                    cell.text = str(header)
+                    
+                    # Apply background color
+                    bg_color = header_style["background"].lstrip('#')
+                    self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
+                    
+                    # Apply text styling
+                    for paragraph in cell.paragraphs:
+                        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
+                        for run in paragraph.runs:
+                            run.bold = header_style["bold"]
+                            run.font.size = Pt(11)
+                            text_color = header_style["text_color"].lstrip('#')
+                            run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
+            
+            # Add data rows with AI-generated styling
+            cell_style = styles["table_cell"]
+            for row_idx, row_data in enumerate(rows):
+                if row_idx + 1 < len(table.rows):
+                    table_row = table.rows[row_idx + 1]
+                    for col_idx, cell_data in enumerate(row_data):
+                        if col_idx < len(table_row.cells):
+                            cell = table_row.cells[col_idx]
+                            cell.text = str(cell_data)
+                            
+                            # Apply text styling
+                            for paragraph in cell.paragraphs:
+                                paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
+                                for run in paragraph.runs:
+                                    run.font.size = Pt(10)
+                                    text_color = cell_style["text_color"].lstrip('#')
+                                    run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
+                            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+    
+    def _apply_horizontal_borders_only(self, table) -> None:
+        """Apply only horizontal borders to the table (no vertical borders)."""
+        try:
+            from docx.oxml.shared import OxmlElement, qn
+            
+            # Get table properties
+            tbl_pr = table._element.find(qn('w:tblPr'))
+            if tbl_pr is None:
+                tbl_pr = OxmlElement('w:tblPr')
+                table._element.insert(0, tbl_pr)
+            
+            # Remove existing borders
+            existing_borders = tbl_pr.find(qn('w:tblBorders'))
+            if existing_borders is not None:
+                tbl_pr.remove(existing_borders)
+            
+            # Create new borders element
+            tbl_borders = OxmlElement('w:tblBorders')
+            
+            # Top border
+            top_border = OxmlElement('w:top')
+            top_border.set(qn('w:val'), 'single')
+            top_border.set(qn('w:sz'), '4')
+            top_border.set(qn('w:space'), '0')
+            top_border.set(qn('w:color'), '000000')
+            tbl_borders.append(top_border)
+            
+            # Bottom border
+            bottom_border = OxmlElement('w:bottom')
+            bottom_border.set(qn('w:val'), 'single')
+            bottom_border.set(qn('w:sz'), '4')
+            bottom_border.set(qn('w:space'), '0')
+            bottom_border.set(qn('w:color'), '000000')
+            tbl_borders.append(bottom_border)
+            
+            # Left border - none
+            left_border = OxmlElement('w:left')
+            left_border.set(qn('w:val'), 'none')
+            tbl_borders.append(left_border)
+            
+            # Right border - none
+            right_border = OxmlElement('w:right')
+            right_border.set(qn('w:val'), 'none')
+            tbl_borders.append(right_border)
+            
+            # Inside horizontal border
+            inside_h_border = OxmlElement('w:insideH')
+            inside_h_border.set(qn('w:val'), 'single')
+            inside_h_border.set(qn('w:sz'), '4')
+            inside_h_border.set(qn('w:space'), '0')
+            inside_h_border.set(qn('w:color'), '000000')
+            tbl_borders.append(inside_h_border)
+            
+            # Inside vertical border - none
+            inside_v_border = OxmlElement('w:insideV')
+            inside_v_border.set(qn('w:val'), 'none')
+            tbl_borders.append(inside_v_border)
+            
+            tbl_pr.append(tbl_borders)
+            
+        except Exception as e:
+            self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
+    
+    def _set_cell_background(self, cell, color: RGBColor) -> None:
+        """Set the background color of a table cell."""
+        try:
+            from docx.oxml.shared import OxmlElement, qn
+            
+            # Get cell properties
+            tc_pr = cell._element.find(qn('w:tcPr'))
+            if tc_pr is None:
+                tc_pr = OxmlElement('w:tcPr')
+                cell._element.insert(0, tc_pr)
+            
+            # Remove existing shading
+            existing_shading = tc_pr.find(qn('w:shd'))
+            if existing_shading is not None:
+                tc_pr.remove(existing_shading)
+            
+            # Create new shading element
+            shading = OxmlElement('w:shd')
+            shading.set(qn('w:val'), 'clear')
+            shading.set(qn('w:color'), 'auto')
+            # Convert RGBColor to hex string by unpacking RGB components
+            red, green, blue = color
+            hex_color = f"{red:02x}{green:02x}{blue:02x}"
+            shading.set(qn('w:fill'), hex_color)
+            tc_pr.append(shading)
+            
+        except Exception as e:
+            self.logger.warning(f"Could not set cell background: {str(e)}")
+    
+    
+    def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON bullet list to DOCX using AI-generated styles."""
+        try:
+            items = list_data.get("items", [])
+            bullet_style = styles["bullet_list"]
+            
+            for item in items:
+                if isinstance(item, str):
+                    para = doc.add_paragraph(item, style='List Bullet')
+                elif isinstance(item, dict) and "text" in item:
+                    para = doc.add_paragraph(item["text"], style='List Bullet')
+                    
+        except Exception as e:
+            self.logger.warning(f"Error rendering bullet list: {str(e)}")
+    
+    def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON heading to DOCX using AI-generated styles."""
+        try:
+            level = heading_data.get("level", 1)
+            text = heading_data.get("text", "")
+            
+            if text:
+                level = max(1, min(6, level))
+                doc.add_heading(text, level=level)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+    
+    def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON paragraph to DOCX using AI-generated styles."""
+        try:
+            text = paragraph_data.get("text", "")
+            
+            if text:
+                para = doc.add_paragraph(text)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+    
+    def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON code block to DOCX using AI-generated styles."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            if code:
+                if language:
+                    lang_para = doc.add_paragraph(f"Code ({language}):")
+                    lang_para.runs[0].bold = True
+                
+                code_para = doc.add_paragraph(code)
+                for run in code_para.runs:
+                    run.font.name = 'Courier New'
+                    run.font.size = Pt(10)
+                    
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+    
+    def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Render a JSON image to DOCX."""
+        try:
+            base64_data = image_data.get("base64Data", "")
+            alt_text = image_data.get("altText", "Image")
+            
+            if base64_data:
+                image_bytes = base64.b64decode(base64_data)
+                doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
+                
+                if alt_text:
+                    caption_para = doc.add_paragraph(f"Figure: {alt_text}")
+                    caption_para.runs[0].italic = True
+                    
+        except Exception as e:
+            self.logger.warning(f"Error rendering image: {str(e)}")
+            doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
+    
+    def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
+        """Extract document structure from user prompt."""
+        structure = {
+            'title': title,
+            'sections': [],
+            'format': 'standard'
+        }
+        
+        if not user_prompt:
+            return structure
+        
+        # Extract title from prompt if not provided
+        if not title or title == "Generated Document":
+            # Look for "create a ... document" or "generate a ... report"
+            import re
+            title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
+            if title_match:
+                structure['title'] = title_match.group(1).strip().title()
+        
+        # Extract sections from numbered lists in prompt
+        import re
+        section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
+        sections = re.findall(section_pattern, user_prompt)
+        
+        for num, section_text in sections:
+            structure['sections'].append({
+                'number': int(num),
+                'title': section_text.strip(),
+                'level': 2  # H2 level
+            })
+        
+        # If no numbered sections found, try to extract from "including:" patterns
+        if not structure['sections']:
+            including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
+            if including_match:
+                including_text = including_match.group(1)
+                # Split by common separators
+                parts = re.split(r'[,;]\s*', including_text)
+                for i, part in enumerate(parts, 1):
+                    part = part.strip()
+                    if part:
+                        structure['sections'].append({
+                            'number': i,
+                            'title': part,
+                            'level': 2
+                        })
+        
+        # If still no sections, extract from any list-like patterns
+        if not structure['sections']:
+            # Look for bullet points or dashes
+            bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
+            bullets = re.findall(bullet_pattern, user_prompt)
+            for i, bullet in enumerate(bullets, 1):
+                bullet = bullet.strip()
+                if bullet and len(bullet) > 3:
+                    structure['sections'].append({
+                        'number': i,
+                        'title': bullet,
+                        'level': 2
+                    })
+        
+        # If still no sections, extract from sentence structure
+        if not structure['sections']:
+            # Split prompt into sentences and use as sections
+            sentences = re.split(r'[.!?]\s+', user_prompt)
+            for i, sentence in enumerate(sentences[:5], 1):  # Max 5 sections
+                sentence = sentence.strip()
+                if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
+                    structure['sections'].append({
+                        'number': i,
+                        'title': sentence[:50] + "..." if len(sentence) > 50 else sentence,
+                        'level': 2
+                    })
+        
+        # Final fallback: create sections from prompt keywords
+        if not structure['sections']:
+            # Extract key action words from prompt
+            action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
+            found_actions = []
+            for action in action_words:
+                if action in user_prompt.lower():
+                    found_actions.append(action.title())
+            
+            if found_actions:
+                for i, action in enumerate(found_actions[:3], 1):
+                    structure['sections'].append({
+                        'number': i,
+                        'title': f"{action} Document Content",
+                        'level': 2
+                    })
+            else:
+                # Last resort: generic but meaningful sections
+                structure['sections'] = [
+                    {'number': 1, 'title': 'Document Analysis', 'level': 2},
+                    {'number': 2, 'title': 'Key Information', 'level': 2},
+                    {'number': 3, 'title': 'Summary and Conclusions', 'level': 2}
+                ]
+        
+        return structure
+    
+    def _generate_content_from_structure(self, doc, content: str, structure: Dict[str, Any]):
+        """Generate DOCX content based on extracted structure."""
+        # Add sections based on prompt structure
+        for section in structure['sections']:
+            # Add section heading
+            doc.add_heading(f"{section['number']}) {section['title']}", level=section['level'])
+            
+            # Add AI-generated content for this section
+            # Try to extract relevant content for this section from the AI response
+            section_content = self._extract_section_content(content, section['title'])
+            
+            if section_content:
+                doc.add_paragraph(section_content)
+            else:
+                # If no specific content found, add a note
+                doc.add_paragraph(f"Content for {section['title']} based on document analysis.")
+            
+            # Add some spacing
+            doc.add_paragraph()
+        
+        # Add the complete AI-generated content as additional analysis
+        if content and content.strip():
+            doc.add_heading("Complete Analysis", level=1)
+            doc.add_paragraph(content)
+    
+    def _extract_section_content(self, content: str, section_title: str) -> str:
+        """Extract relevant content for a specific section from AI response."""
+        if not content or not section_title:
+            return ""
+        
+        # Look for content that matches the section title
+        section_keywords = section_title.lower().split()
+        
+        # Split content into paragraphs
+        paragraphs = content.split('\n\n')
+        
+        relevant_paragraphs = []
+        for paragraph in paragraphs:
+            paragraph_lower = paragraph.lower()
+            # Check if paragraph contains keywords from section title
+            if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3):
+                relevant_paragraphs.append(paragraph.strip())
+        
+        if relevant_paragraphs:
+            return '\n\n'.join(relevant_paragraphs[:2])  # Max 2 paragraphs per section
+        
+        return ""
+    
+    def _setup_document_styles(self, doc):
+        """Set up document styles."""
+        try:
+            # Set default font
+            style = doc.styles['Normal']
+            font = style.font
+            font.name = 'Calibri'
+            font.size = Pt(11)
+            
+            # Set heading styles
+            for i in range(1, 4):
+                heading_style = doc.styles[f'Heading {i}']
+                heading_font = heading_style.font
+                heading_font.name = 'Calibri'
+                heading_font.size = Pt(16 - i * 2)
+                heading_font.bold = True
+        except Exception as e:
+            self.logger.warning(f"Could not set up document styles: {str(e)}")
+    
+    def _process_section(self, doc, lines: list):
+        """Process a section of content into DOCX elements."""
+        for line in lines:
+            if not line.strip():
+                continue
+            
+            # Check for tables (lines with |)
+            if '|' in line and not line.startswith('|'):
+                # This might be part of a table, process as table
+                table_data = self._extract_table_data(lines)
+                if table_data:
+                    self._add_table(doc, table_data)
+                    return
+            
+            # Check for lists
+            if line.startswith('- ') or line.startswith('* '):
+                # This is a list item
+                doc.add_paragraph(line[2:], style='List Bullet')
+            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
+                # This is a numbered list item
+                doc.add_paragraph(line[3:], style='List Number')
+            else:
+                # Regular paragraph
+                doc.add_paragraph(line)
+    
+    def _extract_table_data(self, lines: list) -> list:
+        """Extract table data from lines."""
+        table_data = []
+        in_table = False
+        
+        for line in lines:
+            if '|' in line:
+                if not in_table:
+                    in_table = True
+                # Split by | and clean up
+                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
+                if cells:
+                    table_data.append(cells)
+            elif in_table and not line.strip():
+                # Empty line, might be end of table
+                break
+        
+        return table_data if len(table_data) > 1 else []
+    
+    def _add_table(self, doc, table_data: list):
+        """Add a table to the document."""
+        try:
+            if not table_data:
+                return
+            
+            # Create table
+            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
+            table.alignment = WD_TABLE_ALIGNMENT.CENTER
+            
+            # Add data to table
+            for row_idx, row_data in enumerate(table_data):
+                for col_idx, cell_data in enumerate(row_data):
+                    if col_idx < len(table.rows[row_idx].cells):
+                        table.rows[row_idx].cells[col_idx].text = cell_data
+            
+            # Style the table
+            self._style_table(table)
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add table: {str(e)}")
+    
+    def _style_table(self, table):
+        """Apply styling to the table."""
+        try:
+            # Style header row
+            if len(table.rows) > 0:
+                header_cells = table.rows[0].cells
+                for cell in header_cells:
+                    for paragraph in cell.paragraphs:
+                        for run in paragraph.runs:
+                            run.bold = True
+        except Exception as e:
+            self.logger.warning(f"Could not style table: {str(e)}")
+    
+    def _process_table_row(self, doc, line: str):
+        """Process a table row and add it to the document."""
+        if not line.strip():
+            return
+        
+        # Split by pipe separator
+        parts = [part.strip() for part in line.split('|')]
+        
+        if len(parts) >= 2:
+            # This is a table row - create a table if it doesn't exist
+            if not hasattr(self, '_current_table') or self._current_table is None:
+                # Create new table
+                self._current_table = doc.add_table(rows=1, cols=len(parts))
+                self._current_table.style = 'Table Grid'
+                
+                # Add header row
+                for i, part in enumerate(parts):
+                    if i < len(self._current_table.rows[0].cells):
+                        cell = self._current_table.rows[0].cells[i]
+                        cell.text = part
+                        # Make header bold
+                        for paragraph in cell.paragraphs:
+                            for run in paragraph.runs:
+                                run.bold = True
+            else:
+                # Add data row to existing table
+                row = self._current_table.add_row()
+                for i, part in enumerate(parts):
+                    if i < len(row.cells):
+                        row.cells[i].text = part
+        else:
+            # Not a table row, treat as regular text
+            doc.add_paragraph(line)
+    
+    def _clean_ai_content(self, content: str) -> str:
+        """Clean AI-generated content by removing debug information and duplicates."""
+        if not content:
+            return ""
+        
+        # Remove debug information
+        lines = content.split('\n')
+        clean_lines = []
+        
+        for line in lines:
+            # Skip debug lines and separators
+            if (line.startswith('[Skipped ') or 
+                line.startswith('=== DOCUMENT:') or 
+                line.startswith('---') or
+                line.startswith('FILENAME:') or
+                line.strip() == '' or
+                line.strip() == '---'):
+                continue
+            clean_lines.append(line)
+        
+        # Join lines and remove duplicate content
+        clean_content = '\n'.join(clean_lines)
+        
+        # Remove duplicate sections by keeping only the first occurrence
+        sections = clean_content.split('\n\n')
+        seen_sections = set()
+        unique_sections = []
+        
+        for section in sections:
+            section_key = section.strip()[:50]  # Use first 50 chars as key
+            if section_key not in seen_sections and section.strip():
+                seen_sections.add(section_key)
+                unique_sections.append(section)
+        
+        return '\n\n'.join(unique_sections)
+    
+    def _process_tables(self, doc, content: str) -> str:
+        """
+        Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
+        Returns the content with tables replaced by placeholders.
+        """
+        import csv
+        import io
+        
+        lines = content.split('\n')
+        processed_lines = []
+        i = 0
+        
+        while i < len(lines):
+            line = lines[i].strip()
+            
+            # Check if this line looks like a table (contains pipes or commas with multiple fields)
+            is_pipe_table = '|' in line and len(line.split('|')) >= 2
+            is_csv_table = ',' in line and len(line.split(',')) >= 2
+            
+            if is_pipe_table or is_csv_table:
+                # Collect consecutive table lines
+                table_lines = []
+                j = i
+                
+                # Determine separator and collect lines
+                separator = '|' if is_pipe_table else ','
+                while j < len(lines):
+                    current_line = lines[j].strip()
+                    if separator in current_line and len(current_line.split(separator)) >= 2:
+                        table_lines.append(current_line)
+                        j += 1
+                    else:
+                        break
+                
+                if len(table_lines) >= 2:  # At least header + 1 data row
+                    # Create Word table
+                    try:
+                        if separator == '|':
+                            # Process pipe-separated table
+                            rows = []
+                            for table_line in table_lines:
+                                # Split by pipe and clean up
+                                cells = [cell.strip() for cell in table_line.split('|')]
+                                rows.append(cells)
+                        else:
+                            # Process CSV table
+                            csv_content = '\n'.join(table_lines)
+                            csv_reader = csv.reader(io.StringIO(csv_content))
+                            rows = list(csv_reader)
+                        
+                        if rows and len(rows[0]) > 0:
+                            # Create Word table
+                            table = doc.add_table(rows=len(rows), cols=len(rows[0]))
+                            table.style = 'Table Grid'
+                            
+                            # Populate table
+                            for row_idx, row_data in enumerate(rows):
+                                for col_idx, cell_data in enumerate(row_data):
+                                    if col_idx < len(table.rows[row_idx].cells):
+                                        table.rows[row_idx].cells[col_idx].text = cell_data.strip()
+                                
+                                # Make header row bold
+                                if row_idx == 0:
+                                    for cell in table.rows[row_idx].cells:
+                                        for paragraph in cell.paragraphs:
+                                            for run in paragraph.runs:
+                                                run.bold = True
+                            
+                            # Add placeholder to mark where table was inserted
+                            processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]")
+                            
+                            # Skip the table lines
+                            i = j
+                            continue
+                    except Exception as e:
+                        # If table parsing fails, treat as regular text
+                        pass
+            
+            processed_lines.append(line)
+            i += 1
+        
+        return '\n'.join(processed_lines)
+    
+    def _parse_and_format_content(self, doc, content: str, title: str):
+        """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
+        if not content:
+            return
+        
+        # Process tables and replace them with placeholders
+        content = self._process_tables(doc, content)
+        
+        # Parse content line by line in exact sequence
+        lines = content.split('\n')
+        
+        for line in lines:
+            line = line.strip()
+            if not line:
+                # Empty line - add paragraph break
+                doc.add_paragraph()
+                continue
+            
+            # Skip table placeholders (already processed)
+            if line.startswith('[TABLE_INSERTED_'):
+                continue
+            
+            # Check if this is a Markdown heading (# ## ###)
+            if line.startswith('#'):
+                level = len(line) - len(line.lstrip('#'))
+                heading_text = line.lstrip('# ').strip()
+                doc.add_heading(heading_text, level=min(level, 3))
+                
+            # Check if this is a numbered heading (1) Title, 2) Title, etc.)
+            elif re.match(r'^\d+\)\s+.+', line):
+                heading_text = re.sub(r'^\d+\)\s+', '', line)
+                doc.add_heading(heading_text, level=1)
+                
+            # Check if this is a Markdown list item
+            elif line.startswith('- ') or re.match(r'^\d+\.\s+', line):
+                bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line)
+                self._add_bullet_point(doc, bullet_text)
+                
+            # Check if this is a code block
+            elif line.startswith('```'):
+                if not line.endswith('```'):
+                    # Start of code block - collect until end
+                    code_lines = [line]
+                    continue
+                else:
+                    # End of code block
+                    if 'code_lines' in locals():
+                        code_lines.append(line)
+                        code_text = '\n'.join(code_lines)
+                        para = doc.add_paragraph()
+                        run = para.add_run(code_text)
+                        run.font.name = 'Courier New'
+                        del code_lines
+                        
+            # Regular paragraph
+            else:
+                self._add_paragraph_to_doc(doc, line)
+    
+    def _add_paragraph_to_doc(self, doc, text: str):
+        """Add a paragraph to the document with proper formatting."""
+        if not text.strip():
+            return
+        
+        # Check for Markdown formatting (**bold**, *italic*)
+        para = doc.add_paragraph()
+        
+        # Split by bold markers
+        parts = text.split('**')
+        for i, part in enumerate(parts):
+            if i % 2 == 0:
+                # Regular text - check for italic
+                italic_parts = part.split('*')
+                for j, italic_part in enumerate(italic_parts):
+                    if j % 2 == 0:
+                        # Regular text
+                        if italic_part:
+                            para.add_run(italic_part)
+                    else:
+                        # Italic text
+                        if italic_part:
+                            run = para.add_run(italic_part)
+                            run.italic = True
+            else:
+                # Bold text
+                if part:
+                    run = para.add_run(part)
+                    run.bold = True
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@ -0,0 +1,424 @@
+"""
+HTML renderer for report generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+
+class RendererHtml(BaseRenderer):
+    """Renders content to HTML format with format-specific extraction."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported HTML formats."""
+        return ['html', 'htm']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['web', 'webpage']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for HTML renderer."""
+        return 100
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to HTML format using AI-analyzed styling."""
+        try:
+            # Generate HTML using AI-analyzed styling
+            html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
+            
+            return html_content, "text/html"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering HTML: {str(e)}")
+            # Return minimal HTML fallback
+            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
+    
+    async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Generate HTML content from structured JSON document using AI-generated styling."""
+        try:
+            # Get AI-generated styling definitions
+            styles = await self._get_html_styles(user_prompt, ai_service)
+            
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Build HTML document
+            html_parts = []
+            
+            # HTML document structure
+            html_parts.append('<!DOCTYPE html>')
+            html_parts.append('<html lang="en">')
+            html_parts.append('<head>')
+            html_parts.append('<meta charset="UTF-8">')
+            html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
+            html_parts.append(f'<title>{document_title}</title>')
+            html_parts.append('<style>')
+            html_parts.append(self._generate_css_styles(styles))
+            html_parts.append('</style>')
+            html_parts.append('</head>')
+            html_parts.append('<body>')
+            
+            # Document header
+            html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
+            
+            # Main content
+            html_parts.append('<main>')
+            
+            # Process each section
+            sections = json_content.get("sections", [])
+            for section in sections:
+                section_html = self._render_json_section(section, styles)
+                if section_html:
+                    html_parts.append(section_html)
+            
+            html_parts.append('</main>')
+            
+            # Footer
+            html_parts.append('<footer>')
+            html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
+            html_parts.append('</footer>')
+            
+            html_parts.append('</body>')
+            html_parts.append('</html>')
+            
+            return '\n'.join(html_parts)
+            
+        except Exception as e:
+            self.logger.error(f"Error generating HTML from JSON: {str(e)}")
+            raise Exception(f"HTML generation failed: {str(e)}")
+    
+    async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Get HTML styling definitions using base template AI styling."""
+        style_schema = {
+            "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
+            "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
+            "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
+            "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
+            "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
+            "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
+            "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
+            "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
+            "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
+            "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
+            "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
+        }
+        
+        style_template = self._create_ai_style_template("html", user_prompt, style_schema)
+        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
+        
+        # Validate and fix contrast issues
+        return self._validate_html_styles_contrast(styles)
+    
+    def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and fix contrast issues in AI-generated styles."""
+        try:
+            # Fix table header contrast
+            if "table_header" in styles:
+                header = styles["table_header"]
+                bg_color = header.get("background", "#FFFFFF")
+                text_color = header.get("color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    header["background"] = "#4F4F4F"
+                    header["color"] = "#FFFFFF"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    header["background"] = "#4F4F4F"
+                    header["color"] = "#FFFFFF"
+            
+            # Fix table cell contrast
+            if "table_cell" in styles:
+                cell = styles["table_cell"]
+                bg_color = cell.get("background", "#FFFFFF")
+                text_color = cell.get("color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    cell["background"] = "#FFFFFF"
+                    cell["color"] = "#2F2F2F"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    cell["background"] = "#FFFFFF"
+                    cell["color"] = "#2F2F2F"
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"Style validation failed: {str(e)}")
+            return self._get_default_html_styles()
+    
+    
+    def _get_default_html_styles(self) -> Dict[str, Any]:
+        """Default HTML styles."""
+        return {
+            "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
+            "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
+            "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
+            "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
+            "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
+            "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
+            "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
+            "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
+            "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
+            "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
+            "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
+        }
+    
+    def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
+        """Generate CSS from style definitions."""
+        css_parts = []
+        
+        # Body styles
+        body_style = styles.get("body", {})
+        css_parts.append("body {")
+        for property_name, value in body_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Document title
+        title_style = styles.get("title", {})
+        css_parts.append(".document-title {")
+        for property_name, value in title_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Headings
+        for heading_level in ["heading1", "heading2"]:
+            heading_style = styles.get(heading_level, {})
+            css_class = f"h{heading_level[-1]}"
+            css_parts.append(f"{css_class} {{")
+            for property_name, value in heading_style.items():
+                css_property = property_name.replace("_", "-")
+                css_parts.append(f"  {css_property}: {value};")
+            css_parts.append("}")
+        
+        # Paragraphs
+        paragraph_style = styles.get("paragraph", {})
+        css_parts.append("p {")
+        for property_name, value in paragraph_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Tables
+        table_style = styles.get("table", {})
+        css_parts.append("table {")
+        for property_name, value in table_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Table headers
+        table_header_style = styles.get("table_header", {})
+        css_parts.append("th {")
+        for property_name, value in table_header_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Table cells
+        table_cell_style = styles.get("table_cell", {})
+        css_parts.append("td {")
+        for property_name, value in table_cell_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Lists
+        bullet_list_style = styles.get("bullet_list", {})
+        css_parts.append("ul {")
+        for property_name, value in bullet_list_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Code blocks
+        code_block_style = styles.get("code_block", {})
+        css_parts.append("pre {")
+        for property_name, value in code_block_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Images
+        image_style = styles.get("image", {})
+        css_parts.append("img {")
+        for property_name, value in image_style.items():
+            css_property = property_name.replace("_", "-")
+            css_parts.append(f"  {css_property}: {value};")
+        css_parts.append("}")
+        
+        # Generated info
+        css_parts.append(".generated-info {")
+        css_parts.append("  font-size: 0.9em;")
+        css_parts.append("  color: #666;")
+        css_parts.append("  text-align: center;")
+        css_parts.append("  margin-top: 2em;")
+        css_parts.append("  padding-top: 1em;")
+        css_parts.append("  border-top: 1px solid #ddd;")
+        css_parts.append("}")
+        
+        return '\n'.join(css_parts)
+    
+    def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a single JSON section to HTML using AI-generated styles."""
+        try:
+            section_type = self._get_section_type(section)
+            section_data = self._get_section_data(section)
+            
+            if section_type == "table":
+                # Process the section data to extract table structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_table(processed_data, styles)
+            elif section_type == "bullet_list":
+                # Process the section data to extract bullet list structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_bullet_list(processed_data, styles)
+            elif section_type == "heading":
+                return self._render_json_heading(section_data, styles)
+            elif section_type == "paragraph":
+                return self._render_json_paragraph(section_data, styles)
+            elif section_type == "code_block":
+                # Process the section data to extract code block structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_code_block(processed_data, styles)
+            elif section_type == "image":
+                # Process the section data to extract image structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_image(processed_data, styles)
+            else:
+                # Fallback to paragraph for unknown types
+                return self._render_json_paragraph(section_data, styles)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+            return f'<div class="error">[Error rendering section: {str(e)}]</div>'
+    
+    def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON table to HTML using AI-generated styles."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            if not headers or not rows:
+                return ""
+            
+            html_parts = ['<table>']
+            
+            # Table header
+            html_parts.append('<thead><tr>')
+            for header in headers:
+                html_parts.append(f'<th>{header}</th>')
+            html_parts.append('</tr></thead>')
+            
+            # Table body
+            html_parts.append('<tbody>')
+            for row in rows:
+                html_parts.append('<tr>')
+                for cell_data in row:
+                    html_parts.append(f'<td>{cell_data}</td>')
+                html_parts.append('</tr>')
+            html_parts.append('</tbody>')
+            
+            html_parts.append('</table>')
+            return '\n'.join(html_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+            return ""
+    
+    def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON bullet list to HTML using AI-generated styles."""
+        try:
+            items = list_data.get("items", [])
+            
+            if not items:
+                return ""
+            
+            html_parts = ['<ul>']
+            for item in items:
+                if isinstance(item, str):
+                    html_parts.append(f'<li>{item}</li>')
+                elif isinstance(item, dict) and "text" in item:
+                    html_parts.append(f'<li>{item["text"]}</li>')
+            html_parts.append('</ul>')
+            
+            return '\n'.join(html_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering bullet list: {str(e)}")
+            return ""
+    
+    def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON heading to HTML using AI-generated styles."""
+        try:
+            level = heading_data.get("level", 1)
+            text = heading_data.get("text", "")
+            
+            if text:
+                level = max(1, min(6, level))
+                return f'<h{level}>{text}</h{level}>'
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+            return ""
+    
+    def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON paragraph to HTML using AI-generated styles."""
+        try:
+            text = paragraph_data.get("text", "")
+            
+            if text:
+                return f'<p>{text}</p>'
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+            return ""
+    
+    def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON code block to HTML using AI-generated styles."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            if code:
+                if language:
+                    return f'<pre><code class="language-{language}">{code}</code></pre>'
+                else:
+                    return f'<pre><code>{code}</code></pre>'
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+            return ""
+    
+    def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
+        """Render a JSON image to HTML."""
+        try:
+            base64_data = image_data.get("base64Data", "")
+            alt_text = image_data.get("altText", "Image")
+            
+            if base64_data:
+                return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering image: {str(e)}")
+            return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@ -0,0 +1,281 @@
+"""
+Image renderer for report generation using AI image generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import base64
+import logging
+
+logger = logging.getLogger(__name__)
+
+class RendererImage(BaseRenderer):
+    """Renders content to image format using AI image generation."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported image formats."""
+        return ['png', 'jpg', 'jpeg', 'image']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['img', 'picture', 'photo', 'graphic']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for image renderer."""
+        return 90
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to image format using AI image generation."""
+        try:
+            # Generate AI image from content
+            image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
+            
+            return image_content, "image/png"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering image: {str(e)}")
+            # Re-raise the exception instead of using fallback
+            raise Exception(f"Image rendering failed: {str(e)}")
+    
+    async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Generate AI image from extracted content."""
+        try:
+            if not ai_service:
+                raise ValueError("AI service is required for image generation")
+            
+            # Validate JSON structure
+            if not isinstance(extracted_content, dict):
+                raise ValueError("Extracted content must be a dictionary")
+            
+            if "sections" not in extracted_content:
+                raise ValueError("Extracted content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = extracted_content.get("metadata", {}).get("title", title)
+            
+            # Create AI prompt for image generation
+            image_prompt = await self._create_image_generation_prompt(extracted_content, document_title, user_prompt, ai_service)
+            
+            # Generate image using AI
+            image_result = await ai_service.aiObjects.generateImage(
+                prompt=image_prompt,
+                size="1024x1024",
+                quality="standard",
+                style="vivid"
+            )
+            
+            # Extract base64 image data from result
+            if image_result and image_result.get("success", False):
+                image_data = image_result.get("image_data", "")
+                if image_data:
+                    return image_data
+                else:
+                    raise ValueError("No image data returned from AI")
+            else:
+                error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
+                raise ValueError(f"AI image generation failed: {error_msg}")
+            
+        except Exception as e:
+            self.logger.error(f"Error generating AI image: {str(e)}")
+            raise Exception(f"AI image generation failed: {str(e)}")
+    
+    async def _create_image_generation_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Create a detailed prompt for AI image generation based on the content."""
+        try:
+            # Start with base prompt
+            prompt_parts = []
+            
+            # Add user's original intent if available
+            if user_prompt:
+                prompt_parts.append(f"User Request: {user_prompt}")
+            
+            # Add document title
+            prompt_parts.append(f"Document Title: {title}")
+            
+            # Analyze content and create visual description
+            sections = extracted_content.get("sections", [])
+            content_description = self._analyze_content_for_visual_description(sections)
+            
+            if content_description:
+                prompt_parts.append(f"Content to Visualize: {content_description}")
+            
+            # Add style guidance
+            style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
+            if style_guidance:
+                prompt_parts.append(f"Visual Style: {style_guidance}")
+            
+            # Combine all parts
+            full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
+            
+            # Add technical requirements
+            full_prompt += "\n\nTechnical Requirements:"
+            full_prompt += "\n- High quality, professional appearance"
+            full_prompt += "\n- Clear, readable text if any text is included"
+            full_prompt += "\n- Appropriate colors and layout"
+            full_prompt += "\n- Suitable for business/professional use"
+            
+            # Truncate prompt if it exceeds DALL-E's 4000 character limit
+            if len(full_prompt) > 4000:
+                # Use AI to compress the prompt intelligently
+                compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
+                if compressed_prompt and len(compressed_prompt) <= 4000:
+                    return compressed_prompt
+                
+                # Fallback to minimal prompt if AI compression fails or is still too long
+                minimal_prompt = f"Create a professional image representing: {title}"
+                if user_prompt:
+                    minimal_prompt += f" - {user_prompt}"
+                
+                # If even the minimal prompt is too long, truncate it
+                if len(minimal_prompt) > 4000:
+                    minimal_prompt = minimal_prompt[:3997] + "..."
+                
+                return minimal_prompt
+            
+            return full_prompt
+            
+        except Exception as e:
+            self.logger.warning(f"Error creating image prompt: {str(e)}")
+            # Fallback to simple prompt
+            return f"Create a professional image representing: {title}"
+    
+    async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
+        """Use AI to intelligently compress a long prompt while preserving key information."""
+        try:
+            if not ai_service:
+                return None
+            
+            compression_prompt = f"""
+You are an expert at creating concise, effective prompts for AI image generation. 
+
+The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
+
+Original prompt ({len(long_prompt)} characters):
+{long_prompt}
+
+Please create a compressed version that:
+1. Keeps the most important visual elements and requirements
+2. Maintains the core intent and style guidance
+3. Preserves technical requirements
+4. Stays under 4000 characters
+5. Is optimized for DALL-E image generation
+
+Return only the compressed prompt, no explanations.
+"""
+            
+            # Use AI to compress the prompt - call the AI service correctly
+            # The ai_service has an aiObjects attribute that contains the actual AI interface
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            
+            request = AiCallRequest(
+                prompt=compression_prompt,
+                options=AiCallOptions(
+                    operationType=OperationType.GENERAL,
+                    maxTokens=2000,
+                    temperature=0.3  # Lower temperature for more consistent compression
+                )
+            )
+            
+            response = await ai_service.aiObjects.call(request)
+            compressed = response.content.strip()
+            
+            # Validate the compressed prompt
+            if compressed and len(compressed) <= 4000 and len(compressed) > 50:
+                self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
+                return compressed
+            else:
+                self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
+                return None
+                
+        except Exception as e:
+            self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
+            return None
+    
+    def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
+        """Analyze content sections and create a visual description for AI."""
+        try:
+            descriptions = []
+            
+            for section in sections:
+                section_type = self._get_section_type(section)
+                section_data = self._get_section_data(section)
+                
+                if section_type == "table":
+                    headers = section_data.get("headers", [])
+                    rows = section_data.get("rows", [])
+                    if headers and rows:
+                        descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
+                
+                elif section_type == "bullet_list":
+                    items = section_data.get("items", [])
+                    if items:
+                        descriptions.append(f"List with {len(items)} items")
+                
+                elif section_type == "heading":
+                    text = section_data.get("text", "")
+                    level = section_data.get("level", 1)
+                    if text:
+                        descriptions.append(f"Heading {level}: {text}")
+                
+                elif section_type == "paragraph":
+                    text = section_data.get("text", "")
+                    if text and len(text) > 10:  # Only include substantial paragraphs
+                        # Truncate long text
+                        truncated = text[:100] + "..." if len(text) > 100 else text
+                        descriptions.append(f"Text content: {truncated}")
+                
+                elif section_type == "code_block":
+                    code = section_data.get("code", "")
+                    language = section_data.get("language", "")
+                    if code:
+                        descriptions.append(f"Code block ({language}): {code[:50]}...")
+            
+            return "; ".join(descriptions) if descriptions else "General document content"
+            
+        except Exception as e:
+            self.logger.warning(f"Error analyzing content: {str(e)}")
+            return "Document content"
+    
+    def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
+        """Determine visual style guidance based on content and user prompt."""
+        try:
+            style_elements = []
+            
+            # Analyze user prompt for style hints
+            if user_prompt:
+                prompt_lower = user_prompt.lower()
+                
+                if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
+                    style_elements.append("modern, clean design")
+                elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
+                    style_elements.append("classic, formal design")
+                elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
+                    style_elements.append("creative, artistic design")
+                elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
+                    style_elements.append("corporate, professional design")
+            
+            # Analyze content type for additional style hints
+            sections = extracted_content.get("sections", [])
+            has_tables = any(self._get_section_type(s) == "table" for s in sections)
+            has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
+            has_code = any(self._get_section_type(s) == "code_block" for s in sections)
+            
+            if has_tables:
+                style_elements.append("data-focused layout")
+            if has_lists:
+                style_elements.append("organized, structured presentation")
+            if has_code:
+                style_elements.append("technical, developer-friendly")
+            
+            # Default style if no specific guidance
+            if not style_elements:
+                style_elements.append("professional, clean design")
+            
+            return ", ".join(style_elements)
+            
+        except Exception as e:
+            self.logger.warning(f"Error determining style guidance: {str(e)}")
+            return "professional design"
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@ -0,0 +1,79 @@
+"""
+JSON renderer for report generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import json
+
+class RendererJson(BaseRenderer):
+    """Renders content to JSON format with format-specific extraction."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported JSON formats."""
+        return ['json']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['data']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for JSON renderer."""
+        return 80
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to JSON format."""
+        try:
+            # The extracted content should already be JSON from the AI
+            # Just validate and format it
+            json_content = self._clean_json_content(extracted_content, title)
+            
+            return json_content, "application/json"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering JSON: {str(e)}")
+            # Return minimal JSON fallback
+            fallback_data = {
+                "title": title,
+                "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
+                "metadata": {"error": str(e)}
+            }
+            return json.dumps(fallback_data, indent=2), "application/json"
+    
+    def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
+        """Clean and validate JSON content from AI."""
+        try:
+            # Validate JSON structure
+            if not isinstance(content, dict):
+                raise ValueError("Content must be a dictionary")
+            
+            # Ensure it has the expected structure
+            if "sections" not in content:
+                # Convert old format to new format
+                content = {
+                    "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
+                    "metadata": {"title": title}
+                }
+            
+            # Ensure metadata exists
+            if "metadata" not in content:
+                content["metadata"] = {}
+            
+            # Set title in metadata if not present
+            if "title" not in content["metadata"]:
+                content["metadata"]["title"] = title
+            
+            # Re-format with proper indentation
+            return json.dumps(content, indent=2, ensure_ascii=False)
+            
+        except Exception as e:
+            self.logger.warning(f"Error cleaning JSON content: {str(e)}")
+            # Return minimal valid JSON
+            fallback_data = {
+                "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
+                "metadata": {"title": title, "error": str(e)}
+            }
+            return json.dumps(fallback_data, indent=2, ensure_ascii=False)
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@ -0,0 +1,221 @@
+"""
+Markdown renderer for report generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+
+class RendererMarkdown(BaseRenderer):
+    """Renders content to Markdown format with format-specific extraction."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported Markdown formats."""
+        return ['md', 'markdown']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['mdown', 'mkd']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for markdown renderer."""
+        return 95
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to Markdown format."""
+        try:
+            # Generate markdown from JSON structure
+            markdown_content = self._generate_markdown_from_json(extracted_content, title)
+            
+            return markdown_content, "text/markdown"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering markdown: {str(e)}")
+            # Return minimal markdown fallback
+            return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
+    
+    def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+        """Generate markdown content from structured JSON document."""
+        try:
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Build markdown content
+            markdown_parts = []
+            
+            # Document title
+            markdown_parts.append(f"# {document_title}")
+            markdown_parts.append("")
+            
+            # Process each section
+            sections = json_content.get("sections", [])
+            for section in sections:
+                section_markdown = self._render_json_section(section)
+                if section_markdown:
+                    markdown_parts.append(section_markdown)
+                    markdown_parts.append("")  # Add spacing between sections
+            
+            # Add generation info
+            markdown_parts.append("---")
+            markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
+            
+            return '\n'.join(markdown_parts)
+            
+        except Exception as e:
+            self.logger.error(f"Error generating markdown from JSON: {str(e)}")
+            raise Exception(f"Markdown generation failed: {str(e)}")
+    
+    def _render_json_section(self, section: Dict[str, Any]) -> str:
+        """Render a single JSON section to markdown."""
+        try:
+            section_type = self._get_section_type(section)
+            section_data = self._get_section_data(section)
+            
+            if section_type == "table":
+                # Process the section data to extract table structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_table(processed_data)
+            elif section_type == "bullet_list":
+                # Process the section data to extract bullet list structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_bullet_list(processed_data)
+            elif section_type == "heading":
+                return self._render_json_heading(section_data)
+            elif section_type == "paragraph":
+                return self._render_json_paragraph(section_data)
+            elif section_type == "code_block":
+                # Process the section data to extract code block structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_code_block(processed_data)
+            elif section_type == "image":
+                # Process the section data to extract image structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_image(processed_data)
+            else:
+                # Fallback to paragraph for unknown types
+                return self._render_json_paragraph(section_data)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+            return f"*[Error rendering section: {str(e)}]*"
+    
+    def _render_json_table(self, table_data: Dict[str, Any]) -> str:
+        """Render a JSON table to markdown."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            if not headers or not rows:
+                return ""
+            
+            markdown_parts = []
+            
+            # Create table header
+            header_line = " | ".join(str(header) for header in headers)
+            markdown_parts.append(header_line)
+            
+            # Add separator line
+            separator_line = " | ".join("---" for _ in headers)
+            markdown_parts.append(separator_line)
+            
+            # Add data rows
+            for row in rows:
+                row_line = " | ".join(str(cell_data) for cell_data in row)
+                markdown_parts.append(row_line)
+            
+            return '\n'.join(markdown_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+            return ""
+    
+    def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
+        """Render a JSON bullet list to markdown."""
+        try:
+            items = list_data.get("items", [])
+            
+            if not items:
+                return ""
+            
+            markdown_parts = []
+            for item in items:
+                if isinstance(item, str):
+                    markdown_parts.append(f"- {item}")
+                elif isinstance(item, dict) and "text" in item:
+                    markdown_parts.append(f"- {item['text']}")
+            
+            return '\n'.join(markdown_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering bullet list: {str(e)}")
+            return ""
+    
+    def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
+        """Render a JSON heading to markdown."""
+        try:
+            level = heading_data.get("level", 1)
+            text = heading_data.get("text", "")
+            
+            if text:
+                level = max(1, min(6, level))
+                return f"{'#' * level} {text}"
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+            return ""
+    
+    def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
+        """Render a JSON paragraph to markdown."""
+        try:
+            text = paragraph_data.get("text", "")
+            return text if text else ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+            return ""
+    
+    def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
+        """Render a JSON code block to markdown."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            if code:
+                if language:
+                    return f"```{language}\n{code}\n```"
+                else:
+                    return f"```\n{code}\n```"
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+            return ""
+    
+    def _render_json_image(self, image_data: Dict[str, Any]) -> str:
+        """Render a JSON image to markdown."""
+        try:
+            alt_text = image_data.get("altText", "Image")
+            base64_data = image_data.get("base64Data", "")
+            
+            if base64_data:
+                # For base64 images, we can't embed them directly in markdown
+                # So we'll use a placeholder with the alt text
+                return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)"
+            else:
+                return f"![{alt_text}](image-placeholder)"
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering image: {str(e)}")
+            return f"![{image_data.get('altText', 'Image')}](image-error)"
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@ -0,0 +1,642 @@
+"""
+PDF renderer for report generation using reportlab.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import io
+import base64
+from datetime import datetime, UTC
+
+try:
+    from reportlab.lib.pagesizes import letter, A4
+    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
+    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
+    from reportlab.lib.units import inch
+    from reportlab.lib import colors
+    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
+    REPORTLAB_AVAILABLE = True
+except ImportError:
+    REPORTLAB_AVAILABLE = False
+
+class RendererPdf(BaseRenderer):
+    """Renders content to PDF format using reportlab."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported PDF formats."""
+        return ['pdf']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['document', 'print']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for PDF renderer."""
+        return 120
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to PDF format using AI-analyzed styling."""
+        try:
+            if not REPORTLAB_AVAILABLE:
+                # Fallback to HTML if reportlab not available
+                from .rendererHtml import RendererHtml
+                html_renderer = RendererHtml()
+                html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
+                return html_content, "text/html"
+            
+            # Generate PDF using AI-analyzed styling
+            pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
+            
+            return pdf_content, "application/pdf"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering PDF: {str(e)}")
+            # Return minimal fallback
+            return f"PDF Generation Error: {str(e)}", "text/plain"
+    
+    async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Generate PDF content from structured JSON document using AI-generated styling."""
+        try:
+            # Get AI-generated styling definitions
+            styles = await self._get_pdf_styles(user_prompt, ai_service)
+            
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Make title shorter to prevent wrapping/overlapping
+            if len(document_title) > 40:
+                document_title = "PowerOn - Consent Agreement"
+            
+            # Create a buffer to hold the PDF
+            buffer = io.BytesIO()
+            
+            # Create PDF document
+            doc = SimpleDocTemplate(
+                buffer,
+                pagesize=A4,
+                rightMargin=72,
+                leftMargin=72,
+                topMargin=72,
+                bottomMargin=18
+            )
+            
+            # Build PDF content
+            story = []
+            
+            # Title page
+            title_style = self._create_title_style(styles)
+            story.append(Paragraph(document_title, title_style))
+            story.append(Spacer(1, 50))  # Increased spacing to prevent overlap
+            story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
+            story.append(Spacer(1, 30))  # Add spacing before page break
+            story.append(PageBreak())
+            
+            # Process each section
+            sections = json_content.get("sections", [])
+            self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
+            for i, section in enumerate(sections):
+                self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
+                section_elements = self._render_json_section(section, styles)
+                self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
+                story.extend(section_elements)
+            
+            # Build PDF
+            doc.build(story)
+            
+            # Get PDF content as base64
+            buffer.seek(0)
+            pdf_bytes = buffer.getvalue()
+            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
+            
+            return pdf_base64
+            
+        except Exception as e:
+            self.logger.error(f"Error generating PDF from JSON: {str(e)}")
+            raise Exception(f"PDF generation failed: {str(e)}")
+    
+    async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Get PDF styling definitions using base template AI styling."""
+        style_schema = {
+            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
+            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
+            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
+            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
+            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
+            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
+            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
+            "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
+        }
+        
+        style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
+        
+        # Use base template method like DOCX does (this works!)
+        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
+        
+        if styles is None:
+            return self._get_default_pdf_styles()
+        
+        # Convert colors to PDF format after getting styles
+        styles = self._convert_colors_format(styles)
+        
+        # Validate and fix contrast issues
+        return self._validate_pdf_styles_contrast(styles)
+    
+    async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Get AI styles with proper PDF color conversion."""
+        if not ai_service:
+            return default_styles
+        
+        try:
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=style_template, context="", options=request_options)
+            
+            # Check if AI service is properly configured
+            if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
+                self.logger.warning("AI service not properly configured, using defaults")
+                return default_styles
+            
+            response = await ai_service.aiObjects.call(request)
+            
+            # Check if response is valid
+            if not response:
+                self.logger.warning("AI service returned no response, using defaults")
+                return default_styles
+            
+            import json
+            import re
+            
+            # Clean and parse JSON
+            result = response.content.strip() if response and response.content else ""
+            
+            # Check if result is empty
+            if not result:
+                self.logger.warning("AI styling returned empty response, using defaults")
+                return default_styles
+            
+            # Log the raw response for debugging
+            self.logger.debug(f"AI styling raw response: {result[:200]}...")
+            
+            # Extract JSON from various formats
+            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1).strip()
+            elif result.startswith('```json'):
+                result = re.sub(r'^```json\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            elif result.startswith('```'):
+                result = re.sub(r'^```\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            
+            # Try to extract JSON from explanatory text
+            json_patterns = [
+                r'\{[^{}]*"title"[^{}]*\}',  # Simple JSON object
+                r'\{.*?"title".*?\}',        # JSON with title field
+                r'\{.*?"font_size".*?\}',    # JSON with font_size field
+            ]
+            
+            for pattern in json_patterns:
+                json_match = re.search(pattern, result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                    break
+            
+            # Additional cleanup - remove any leading/trailing whitespace and newlines
+            result = result.strip()
+            
+            # Check if result is still empty after cleanup
+            if not result:
+                self.logger.warning("AI styling returned empty content after cleanup, using defaults")
+                return default_styles
+            
+            # Try to parse JSON
+            try:
+                styles = json.loads(result)
+                self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
+            except json.JSONDecodeError as json_error:
+                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+                
+                # Use print instead of logger to avoid truncation
+                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
+                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
+                
+                self.logger.warning(f"Raw content that failed to parse: {result}")
+                
+                # Try to fix incomplete JSON by adding missing closing braces
+                open_braces = result.count('{')
+                close_braces = result.count('}')
+                
+                if open_braces > close_braces:
+                    # JSON is incomplete, add missing closing braces
+                    missing_braces = open_braces - close_braces
+                    result = result + '}' * missing_braces
+                    self.logger.info(f"Added {missing_braces} missing closing brace(s)")
+                    
+                    # Try parsing the fixed JSON
+                    try:
+                        styles = json.loads(result)
+                        self.logger.info("Successfully fixed incomplete JSON")
+                    except json.JSONDecodeError as fix_error:
+                        self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
+                        # Try to extract just the JSON part if it's embedded in text
+                        json_start = result.find('{')
+                        json_end = result.rfind('}')
+                        if json_start != -1 and json_end != -1 and json_end > json_start:
+                            json_part = result[json_start:json_end+1]
+                            try:
+                                styles = json.loads(json_part)
+                                self.logger.info("Successfully extracted JSON from explanatory text")
+                            except json.JSONDecodeError:
+                                self.logger.warning("Could not extract valid JSON from response, using defaults")
+                                return default_styles
+                        else:
+                            return default_styles
+                else:
+                    # Try to extract just the JSON part if it's embedded in text
+                    json_start = result.find('{')
+                    json_end = result.rfind('}')
+                    if json_start != -1 and json_end != -1 and json_end > json_start:
+                        json_part = result[json_start:json_end+1]
+                        try:
+                            styles = json.loads(json_part)
+                            self.logger.info("Successfully extracted JSON from explanatory text")
+                        except json.JSONDecodeError:
+                            self.logger.warning("Could not extract valid JSON from response, using defaults")
+                            return default_styles
+                    else:
+                        return default_styles
+            
+            # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
+            styles = self._convert_colors_format(styles)
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+            return default_styles
+
+    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert colors to proper format for PDF compatibility."""
+        try:
+            for style_name, style_config in styles.items():
+                if isinstance(style_config, dict):
+                    for prop, value in style_config.items():
+                        if isinstance(value, str) and value.startswith('#') and len(value) == 7:
+                            # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
+                            styles[style_name][prop] = f"FF{value[1:]}"
+                        elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
+                            # Already aRGB format, keep as is
+                            pass
+            return styles
+        except Exception as e:
+            self.logger.warning(f"Color conversion failed: {str(e)}")
+            return styles
+
+    def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
+        """Get a safe hex color value for PDF."""
+        if isinstance(color_value, str) and color_value.startswith('#'):
+            if len(color_value) == 7:
+                return f"FF{color_value[1:]}"
+            elif len(color_value) == 9:
+                return color_value
+        return default
+    
+    def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and fix contrast issues in AI-generated styles."""
+        try:
+            # Fix table header contrast
+            if "table_header" in styles:
+                header = styles["table_header"]
+                bg_color = header.get("background", "#FFFFFF")
+                text_color = header.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+            
+            # Fix table cell contrast
+            if "table_cell" in styles:
+                cell = styles["table_cell"]
+                bg_color = cell.get("background", "#FFFFFF")
+                text_color = cell.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"Style validation failed: {str(e)}")
+            return self._get_default_pdf_styles()
+    
+    def _get_default_pdf_styles(self) -> Dict[str, Any]:
+        """Default PDF styles."""
+        return {
+            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
+            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
+            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
+            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
+            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
+            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
+            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
+            "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
+        }
+    
+    def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
+        """Create title style from style definitions."""
+        title_style_def = styles.get("title", {})
+        
+        # DEBUG: Show what color and spacing is being used for title
+        title_color = title_style_def.get("color", "#1F4E79")
+        title_space_after = title_style_def.get("space_after", 30)
+        self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
+        self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
+        
+        return ParagraphStyle(
+            'CustomTitle',
+            fontSize=title_style_def.get("font_size", 20),  # Reduced from 24 to 20
+            spaceAfter=title_style_def.get("space_after", 30),
+            alignment=self._get_alignment(title_style_def.get("align", "center")),
+            textColor=self._hex_to_color(title_color),
+            leading=title_style_def.get("font_size", 20) * 1.4,  # Add line spacing for multi-line titles
+            spaceBefore=0  # Ensure no space before title
+        )
+    
+    def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
+        """Create heading style from style definitions."""
+        heading_key = f"heading{level}"
+        heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
+        
+        return ParagraphStyle(
+            f'CustomHeading{level}',
+            fontSize=heading_style_def.get("font_size", 18 - level * 2),
+            spaceAfter=heading_style_def.get("space_after", 12),
+            spaceBefore=heading_style_def.get("space_before", 12),
+            alignment=self._get_alignment(heading_style_def.get("align", "left")),
+            textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
+        )
+    
+    def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
+        """Create normal paragraph style from style definitions."""
+        paragraph_style_def = styles.get("paragraph", {})
+        
+        return ParagraphStyle(
+            'CustomNormal',
+            fontSize=paragraph_style_def.get("font_size", 11),
+            spaceAfter=paragraph_style_def.get("space_after", 6),
+            alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
+            textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
+            leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
+        )
+    
+    def _get_alignment(self, align: str) -> int:
+        """Convert alignment string to reportlab alignment constant."""
+        if not align or not isinstance(align, str):
+            return TA_LEFT
+        
+        align_map = {
+            "center": TA_CENTER,
+            "left": TA_LEFT,
+            "justify": TA_JUSTIFY,
+            "right": TA_LEFT,  # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
+            "0": TA_LEFT,      # Handle numeric strings
+            "1": TA_CENTER,
+            "2": TA_JUSTIFY
+        }
+        return align_map.get(align.lower().strip(), TA_LEFT)
+    
+    def _get_table_alignment(self, align: str) -> str:
+        """Convert alignment string to ReportLab table alignment string."""
+        if not align or not isinstance(align, str):
+            return 'LEFT'
+        
+        align_map = {
+            "center": 'CENTER',
+            "left": 'LEFT',
+            "justify": 'LEFT',  # Tables don't support justify, use LEFT
+            "right": 'RIGHT',
+            "0": 'LEFT',       # Handle numeric strings
+            "1": 'CENTER',
+            "2": 'LEFT'        # Tables don't support justify, use LEFT
+        }
+        return align_map.get(align.lower().strip(), 'LEFT')
+    
+    def _hex_to_color(self, hex_color: str) -> colors.Color:
+        """Convert hex color to reportlab color."""
+        try:
+            hex_color = hex_color.lstrip('#')
+            
+            # Handle aRGB format (8 characters: FF + RGB)
+            if len(hex_color) == 8:
+                # Skip the alpha channel (first 2 characters)
+                hex_color = hex_color[2:]
+            
+            # Handle RGB format (6 characters)
+            if len(hex_color) == 6:
+                r = int(hex_color[0:2], 16) / 255.0
+                g = int(hex_color[2:4], 16) / 255.0
+                b = int(hex_color[4:6], 16) / 255.0
+                return colors.Color(r, g, b)
+            
+            # Fallback for other formats
+            return colors.black
+        except:
+            return colors.black
+    
+    def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a single JSON section to PDF elements using AI-generated styles."""
+        try:
+            section_type = self._get_section_type(section)
+            elements = self._get_section_data(section)
+            
+            # Process each element in the section
+            all_elements = []
+            for element in elements:
+                if section_type == "table":
+                    all_elements.extend(self._render_json_table(element, styles))
+                elif section_type == "bullet_list":
+                    all_elements.extend(self._render_json_bullet_list(element, styles))
+                elif section_type == "heading":
+                    all_elements.extend(self._render_json_heading(element, styles))
+                elif section_type == "paragraph":
+                    all_elements.extend(self._render_json_paragraph(element, styles))
+                elif section_type == "code_block":
+                    all_elements.extend(self._render_json_code_block(element, styles))
+                elif section_type == "image":
+                    all_elements.extend(self._render_json_image(element, styles))
+                else:
+                    # Fallback to paragraph for unknown types
+                    all_elements.extend(self._render_json_paragraph(element, styles))
+            
+            return all_elements
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+            return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
+    
+    def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON table to PDF elements using AI-generated styles."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            if not headers or not rows:
+                return []
+            
+            # Prepare table data
+            table_data_list = [headers] + rows
+            
+            # Create table
+            table = Table(table_data_list)
+            
+            # Apply styling
+            table_header_style = styles.get("table_header", {})
+            table_cell_style = styles.get("table_cell", {})
+            
+            table_style = [
+                ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
+                ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
+                ('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
+                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
+                ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
+                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
+                ('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))),
+                ('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
+                ('GRID', (0, 0), (-1, -1), 1, colors.black)
+            ]
+            
+            table.setStyle(TableStyle(table_style))
+            
+            return [table, Spacer(1, 12)]
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+            return []
+    
+    def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON bullet list to PDF elements using AI-generated styles."""
+        try:
+            items = list_data.get("items", [])
+            bullet_style_def = styles.get("bullet_list", {})
+            
+            elements = []
+            for item in items:
+                if isinstance(item, str):
+                    elements.append(Paragraph(f"• {item}", self._create_normal_style(styles)))
+                elif isinstance(item, dict) and "text" in item:
+                    elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles)))
+            
+            if elements:
+                elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
+            
+            return elements
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering bullet list: {str(e)}")
+            return []
+    
+    def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON heading to PDF elements using AI-generated styles."""
+        try:
+            level = heading_data.get("level", 1)
+            text = heading_data.get("text", "")
+            
+            if text:
+                level = max(1, min(6, level))
+                heading_style = self._create_heading_style(styles, level)
+                return [Paragraph(text, heading_style)]
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+            return []
+    
+    def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON paragraph to PDF elements using AI-generated styles."""
+        try:
+            text = paragraph_data.get("text", "")
+            
+            if text:
+                return [Paragraph(text, self._create_normal_style(styles))]
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+            return []
+    
+    def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON code block to PDF elements using AI-generated styles."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            code_style_def = styles.get("code_block", {})
+            
+            if code:
+                elements = []
+                
+                if language:
+                    lang_style = ParagraphStyle(
+                        'CodeLanguage',
+                        fontSize=code_style_def.get("font_size", 9),
+                        textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
+                        fontName='Helvetica-Bold'
+                    )
+                    elements.append(Paragraph(f"Code ({language}):", lang_style))
+                
+                code_style = ParagraphStyle(
+                    'CodeBlock',
+                    fontSize=code_style_def.get("font_size", 9),
+                    textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
+                    fontName=code_style_def.get("font", "Courier"),
+                    backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
+                    spaceAfter=code_style_def.get("space_after", 6)
+                )
+                elements.append(Paragraph(code, code_style))
+                
+                return elements
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+            return []
+    
+    def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
+        """Render a JSON image to PDF elements."""
+        try:
+            base64_data = image_data.get("base64Data", "")
+            alt_text = image_data.get("altText", "Image")
+            
+            if base64_data:
+                # For now, just add a placeholder since reportlab image handling is complex
+                return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
+            
+            return []
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering image: {str(e)}")
+            return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@ -0,0 +1,885 @@
+import logging
+import base64
+import io
+from typing import Dict, Any, Optional, Tuple, List
+from .rendererBaseTemplate import BaseRenderer
+
+logger = logging.getLogger(__name__)
+
+
+class RendererPptx(BaseRenderer):
+    """Renderer for PowerPoint (.pptx) files using python-pptx library."""
+    
+    def __init__(self):
+        super().__init__()
+        self.supported_formats = ["pptx", "ppt"]
+        self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+    
+    @classmethod
+    def get_supported_formats(cls) -> list:
+        """Get list of supported output formats."""
+        return ["pptx", "ppt"]
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """
+        Render content as PowerPoint presentation from JSON data.
+        
+        Args:
+            extracted_content: JSON content to render as presentation
+            title: Title for the presentation
+            user_prompt: User prompt for AI styling
+            ai_service: AI service for styling
+            **kwargs: Additional rendering options
+        
+        Returns:
+            Base64-encoded PowerPoint presentation as string
+        """
+        try:
+            # Import python-pptx
+            from pptx import Presentation
+            from pptx.util import Inches, Pt
+            from pptx.enum.text import PP_ALIGN
+            from pptx.dml.color import RGBColor
+            import re
+            
+            # Get AI-generated styling definitions first
+            styles = await self._get_pptx_styles(user_prompt, ai_service)
+            
+            # Create new presentation
+            prs = Presentation()
+            
+            # Set slide size based on user intent (default to 16:9)
+            slide_size = styles.get("slide_size", "16:9")
+            if slide_size == "4:3":
+                prs.slide_width = Inches(10)
+                prs.slide_height = Inches(7.5)
+            else:  # Default to 16:9
+                prs.slide_width = Inches(13.33)
+                prs.slide_height = Inches(7.5)
+            
+            # Generate slides from JSON content
+            slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
+            logger.info(f"Parsed {len(slides_data)} slides from JSON content")
+            
+            # Debug: Show first 200 chars of content
+            logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
+            
+            for i, slide_data in enumerate(slides_data):
+                logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
+                # Debug: Show slide content preview
+                slide_content = slide_data.get('content', '')
+                if slide_content:
+                    logger.info(f"  Content preview: '{slide_content[:100]}...'")
+                else:
+                    logger.warning(f"  ⚠️ Slide {i+1} has NO content!")
+                
+                # Create slide with appropriate layout based on content
+                slide_layout_index = self._get_slide_layout_index(slide_data, styles)
+                slide_layout = prs.slide_layouts[slide_layout_index]
+                slide = prs.slides.add_slide(slide_layout)
+                
+                # Set title with AI-generated styling
+                title_shape = slide.shapes.title
+                title_shape.text = slide_data.get("title", "Slide")
+                
+                # Apply title styling
+                title_style = styles.get("title", {})
+                if title_shape.text_frame.paragraphs[0].font:
+                    title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
+                    title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
+                    title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
+                    title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
+                
+                # Set content with AI-generated styling
+                content_shape = slide.placeholders[1]
+                content_text = slide_data.get("content", "")
+                
+                # Format content text with AI styles
+                text_frame = content_shape.text_frame
+                text_frame.clear()
+                
+                # Split content into paragraphs
+                paragraphs = content_text.split('\n\n')
+                
+                for i, paragraph in enumerate(paragraphs):
+                    if paragraph.strip():
+                        if i == 0:
+                            p = text_frame.paragraphs[0]
+                        else:
+                            p = text_frame.add_paragraph()
+                        
+                        p.text = paragraph.strip()
+                        
+                        # Apply AI-generated styling based on content type
+                        if paragraph.startswith('#'):
+                            # Header
+                            p.text = paragraph.lstrip('#').strip()
+                            heading_style = styles.get("heading", {})
+                            p.font.size = Pt(heading_style.get("font_size", 32))
+                            p.font.bold = heading_style.get("bold", True)
+                            heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
+                            p.font.color.rgb = RGBColor(*heading_color)
+                        elif paragraph.startswith('##'):
+                            # Subheader
+                            p.text = paragraph.lstrip('#').strip()
+                            subheading_style = styles.get("subheading", {})
+                            p.font.size = Pt(subheading_style.get("font_size", 24))
+                            p.font.bold = subheading_style.get("bold", True)
+                            subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
+                            p.font.color.rgb = RGBColor(*subheading_color)
+                        elif paragraph.startswith('*') and paragraph.endswith('*'):
+                            # Bold text
+                            p.text = paragraph.strip('*')
+                            paragraph_style = styles.get("paragraph", {})
+                            p.font.size = Pt(paragraph_style.get("font_size", 18))
+                            p.font.bold = True
+                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+                            p.font.color.rgb = RGBColor(*paragraph_color)
+                        else:
+                            # Regular text
+                            paragraph_style = styles.get("paragraph", {})
+                            p.font.size = Pt(paragraph_style.get("font_size", 18))
+                            p.font.bold = paragraph_style.get("bold", False)
+                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+                            p.font.color.rgb = RGBColor(*paragraph_color)
+                        
+                        # Apply alignment
+                        align = paragraph_style.get("align", "left")
+                        if align == "center":
+                            p.alignment = PP_ALIGN.CENTER
+                        elif align == "right":
+                            p.alignment = PP_ALIGN.RIGHT
+                        else:
+                            p.alignment = PP_ALIGN.LEFT
+            
+            # If no slides were created, create a default slide
+            if not slides_data:
+                slide_layout = prs.slide_layouts[0]  # Title slide layout
+                slide = prs.slides.add_slide(slide_layout)
+                
+                title_shape = slide.shapes.title
+                title_shape.text = title
+                
+                # Apply title styling to default slide
+                title_style = styles.get("title", {})
+                if title_shape.text_frame.paragraphs[0].font:
+                    title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
+                    title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
+                    title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
+                    title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
+                
+                subtitle_shape = slide.placeholders[1]
+                subtitle_shape.text = "Generated by PowerOn AI System"
+                
+                # Apply subtitle styling
+                paragraph_style = styles.get("paragraph", {})
+                if subtitle_shape.text_frame.paragraphs[0].font:
+                    subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20))
+                    subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False)
+                    paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
+                    subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color)
+            
+            # Save to buffer
+            buffer = io.BytesIO()
+            prs.save(buffer)
+            buffer.seek(0)
+            
+            # Convert to base64
+            pptx_bytes = buffer.getvalue()
+            pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
+            
+            logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
+            return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+            
+        except ImportError:
+            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
+            return "python-pptx library not installed", "text/plain"
+        except Exception as e:
+            logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
+            return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
+    
+    def _parse_content_to_slides(self, content: str, title: str) -> list:
+        """
+        Parse content into slide data structure.
+        
+        Args:
+            content: Content to parse
+            title: Presentation title
+        
+        Returns:
+            List of slide data dictionaries
+        """
+        slides = []
+        
+        # Split content by slide markers or headers
+        slide_sections = self._split_content_into_slides(content)
+        
+        for i, section in enumerate(slide_sections):
+            if section.strip():
+                slide_data = {
+                    "title": f"Slide {i + 1}",
+                    "content": section.strip()
+                }
+                
+                # Extract title from content if it starts with #
+                lines = section.strip().split('\n')
+                if lines and lines[0].startswith('#'):
+                    # Remove # symbols and clean up title
+                    slide_title = lines[0].lstrip('#').strip()
+                    slide_data["title"] = slide_title
+                    slide_data["content"] = '\n'.join(lines[1:]).strip()
+                elif lines and lines[0].strip():
+                    # Use first line as title if it looks like a title
+                    first_line = lines[0].strip()
+                    if len(first_line) < 100 and not first_line.endswith('.'):
+                        slide_data["title"] = first_line
+                        slide_data["content"] = '\n'.join(lines[1:]).strip()
+                
+                slides.append(slide_data)
+        
+        return slides
+    
+    def _split_content_into_slides(self, content: str) -> list:
+        """
+        Split content into individual slides based on headers and structure.
+        
+        Args:
+            content: Content to split
+        
+        Returns:
+            List of slide content strings
+        """
+        import re
+        
+        # First, try to split by major headers (# or ##)
+        # This is the most common case for AI-generated content
+        header_pattern = r'^(#{1,2})\s+(.+)$'
+        lines = content.split('\n')
+        slides = []
+        current_slide = []
+        
+        for line in lines:
+            # Check if this line is a header
+            header_match = re.match(header_pattern, line.strip())
+            if header_match:
+                # If we have content in current slide, save it
+                if current_slide:
+                    slide_content = '\n'.join(current_slide).strip()
+                    if slide_content:
+                        slides.append(slide_content)
+                    current_slide = []
+                
+                # Start new slide with this header
+                current_slide.append(line)
+            else:
+                # Add line to current slide
+                current_slide.append(line)
+        
+        # Add the last slide
+        if current_slide:
+            slide_content = '\n'.join(current_slide).strip()
+            if slide_content:
+                slides.append(slide_content)
+        
+        # If we found slides with headers, return them
+        if len(slides) > 1:
+            return slides
+        
+        # Fallback: Split by double newlines
+        sections = content.split('\n\n\n')
+        if len(sections) > 1:
+            return [s.strip() for s in sections if s.strip()]
+        
+        # Another fallback: Split by double newlines
+        sections = content.split('\n\n')
+        if len(sections) > 1:
+            return [s.strip() for s in sections if s.strip()]
+        
+        # Last resort: return as single slide
+        return [content.strip()]
+    
+    
+    def get_output_mime_type(self) -> str:
+        """Get MIME type for rendered output."""
+        return self.output_mime_type
+    
+    async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Get PowerPoint styling definitions using base template AI styling."""
+        style_schema = {
+            "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
+            "heading": {"font_size": 36, "color": "#2C5F2D", "bold": True, "align": "left"},
+            "subheading": {"font_size": 28, "color": "#4A90E2", "bold": True, "align": "left"},
+            "paragraph": {"font_size": 20, "color": "#2F2F2F", "bold": False, "align": "left"},
+            "bullet_list": {"font_size": 20, "color": "#2F2F2F", "indent": 20},
+            "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"},
+            "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"},
+            "slide_size": "16:9",
+            "content_per_slide": "concise",
+            "design_theme": "corporate",
+            "color_scheme": "professional",
+            "background_style": "clean",
+            "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"],
+            "professional_grade": True,
+            "executive_ready": True
+        }
+        
+        style_template = self._create_professional_pptx_template(user_prompt, style_schema)
+        # Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
+        styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
+        
+        # Validate PowerPoint-specific requirements
+        return self._validate_pptx_styles_readability(styles)
+    
+    def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
+        """Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
+        import json
+        schema_json = json.dumps(style_schema, indent=4)
+        
+        return f"""Customize the JSON below for professional PowerPoint slides.
+
+User Request: {user_prompt or "Create professional corporate slides"}
+
+Rules:
+- Use professional colors (blues, grays, deep greens)
+- Large, readable font sizes
+- High contrast
+- Sophisticated color palettes
+
+Return ONLY this JSON with your changes:
+
+{schema_json}
+
+JSON ONLY. NO OTHER TEXT."""
+    
+    async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Get AI styles with proper PowerPoint color conversion."""
+        if not ai_service:
+            return default_styles
+        
+        try:
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=style_template, context="", options=request_options)
+            
+            # Check if AI service is properly configured
+            if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
+                self.logger.warning("AI service not properly configured, using defaults")
+                return default_styles
+            
+            response = await ai_service.aiObjects.call(request)
+            
+            # Check if response is valid
+            if not response:
+                self.logger.warning("AI service returned no response, using defaults")
+                return default_styles
+            
+            import json
+            import re
+            
+            # Clean and parse JSON
+            result = response.content.strip() if response and response.content else ""
+            
+            # Check if result is empty
+            if not result:
+                self.logger.warning("AI styling returned empty response, using defaults")
+                return default_styles
+            
+            # Log the raw response for debugging
+            self.logger.debug(f"AI styling raw response: {result[:200]}...")
+            
+            # Extract JSON from various formats
+            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1).strip()
+            elif result.startswith('```json'):
+                result = re.sub(r'^```json\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            elif result.startswith('```'):
+                result = re.sub(r'^```\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+            
+            # Try to extract JSON from explanatory text
+            json_patterns = [
+                r'\{[^{}]*"title"[^{}]*\}',  # Simple JSON object
+                r'\{.*?"title".*?\}',        # JSON with title field
+                r'\{.*?"font_size".*?\}',    # JSON with font_size field
+            ]
+            
+            for pattern in json_patterns:
+                json_match = re.search(pattern, result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                    break
+            
+            # Additional cleanup - remove any leading/trailing whitespace and newlines
+            result = result.strip()
+            
+            # Check if result is still empty after cleanup
+            if not result:
+                self.logger.warning("AI styling returned empty content after cleanup, using defaults")
+                return default_styles
+            
+            # Try to parse JSON
+            try:
+                styles = json.loads(result)
+                self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
+            except json.JSONDecodeError as json_error:
+                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
+                self.logger.warning(f"Raw content that failed to parse: {result[:100]}...")
+                # Try to extract just the JSON part if it's embedded in text
+                json_start = result.find('{')
+                json_end = result.rfind('}')
+                if json_start != -1 and json_end != -1 and json_end > json_start:
+                    json_part = result[json_start:json_end+1]
+                    try:
+                        styles = json.loads(json_part)
+                        self.logger.info("Successfully extracted JSON from explanatory text")
+                        self.logger.debug(f"Extracted AI styles: {list(styles.keys())}")
+                    except json.JSONDecodeError:
+                        self.logger.warning("Could not extract valid JSON from response, using defaults")
+                        return default_styles
+                else:
+                    return default_styles
+            
+            # Convert colors to PowerPoint RGB format
+            styles = self._convert_colors_format(styles)
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+            return default_styles
+
+    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert hex colors to RGB format for PowerPoint compatibility."""
+        try:
+            for style_name, style_config in styles.items():
+                if isinstance(style_config, dict):
+                    for prop, value in style_config.items():
+                        if isinstance(value, str) and value.startswith('#'):
+                            # Convert hex to RGB tuple for PowerPoint
+                            hex_color = value.lstrip('#')
+                            if len(hex_color) == 6:
+                                r = int(hex_color[0:2], 16)
+                                g = int(hex_color[2:4], 16)
+                                b = int(hex_color[4:6], 16)
+                                styles[style_name][prop] = (r, g, b)
+                            elif len(hex_color) == 8:  # aRGB format
+                                r = int(hex_color[2:4], 16)
+                                g = int(hex_color[4:6], 16)
+                                b = int(hex_color[6:8], 16)
+                                styles[style_name][prop] = (r, g, b)
+            return styles
+        except Exception as e:
+            self.logger.warning(f"Color conversion failed: {str(e)}")
+            return styles
+
+    def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
+        """Get a safe RGB color tuple for PowerPoint."""
+        if isinstance(color_value, tuple) and len(color_value) == 3:
+            return color_value
+        elif isinstance(color_value, str) and color_value.startswith('#'):
+            hex_color = color_value.lstrip('#')
+            if len(hex_color) == 6:
+                r = int(hex_color[0:2], 16)
+                g = int(hex_color[2:4], 16)
+                b = int(hex_color[4:6], 16)
+                return (r, g, b)
+            elif len(hex_color) == 8:  # aRGB format
+                r = int(hex_color[2:4], 16)
+                g = int(hex_color[4:6], 16)
+                b = int(hex_color[6:8], 16)
+                return (r, g, b)
+        return default
+    
+    def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and fix readability issues in AI-generated styles."""
+        try:
+            # Ensure minimum font sizes for PowerPoint readability
+            min_font_sizes = {
+                "title": 36,
+                "heading": 24,
+                "subheading": 20,
+                "paragraph": 14,
+                "bullet_list": 14,
+                "table_header": 12,
+                "table_cell": 12
+            }
+            
+            for style_name, min_size in min_font_sizes.items():
+                if style_name in styles:
+                    current_size = styles[style_name].get("font_size", 12)
+                    if current_size < min_size:
+                        styles[style_name]["font_size"] = min_size
+            
+            return styles
+            
+        except Exception as e:
+            logger.warning(f"Style validation failed: {str(e)}")
+            return self._get_default_pptx_styles()
+    
+    def _get_default_pptx_styles(self) -> Dict[str, Any]:
+        """Default PowerPoint styles with corporate professional color scheme."""
+        return {
+            "title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
+            "heading": {"font_size": 36, "color": (44, 95, 45), "bold": True, "align": "left"},
+            "subheading": {"font_size": 28, "color": (74, 144, 226), "bold": True, "align": "left"},
+            "paragraph": {"font_size": 20, "color": (47, 47, 47), "bold": False, "align": "left"},
+            "bullet_list": {"font_size": 20, "color": (47, 47, 47), "indent": 20},
+            "table_header": {"font_size": 18, "color": (255, 255, 255), "bold": True, "background": (27, 54, 93)},
+            "table_cell": {"font_size": 16, "color": (47, 47, 47), "bold": False, "background": (248, 249, 250)},
+            "slide_size": "16:9",
+            "content_per_slide": "concise",
+            "design_theme": "corporate",
+            "color_scheme": "professional",
+            "background_style": "clean",
+            "accent_colors": [(27, 54, 93), (44, 95, 45), (74, 144, 226), (107, 114, 128)],
+            "professional_grade": True,
+            "executive_ready": True
+        }
+    
+    async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """
+        Parse JSON content into slide data structure.
+        
+        Args:
+            json_content: JSON content to parse
+            title: Presentation title
+            styles: AI-generated styles
+        
+        Returns:
+            List of slide data dictionaries
+        """
+        slides = []
+        
+        try:
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Create title slide
+            slides.append({
+                "title": document_title,
+                "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
+            })
+            
+            # Process sections into slides based on content and user intent
+            sections = json_content.get("sections", [])
+            slides.extend(self._create_slides_from_sections(sections, styles))
+            
+            # If no content slides were created, create a default content slide
+            if len(slides) == 1:  # Only title slide
+                slides.append({
+                    "title": "Content Overview",
+                    "content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
+                })
+            
+            return slides
+            
+        except Exception as e:
+            logger.error(f"Error parsing JSON to slides: {str(e)}")
+            # Return minimal fallback slides
+            return [
+                {
+                    "title": title,
+                    "content": "Error parsing content for presentation"
+                }
+            ]
+    
+    def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Create a slide from a JSON section."""
+        try:
+            # Get section title from data or use default
+            section_title = "Untitled Section"
+            if section.get("content_type") == "heading":
+                # Extract text from elements array
+                for element in section.get("elements", []):
+                    if isinstance(element, dict) and "text" in element:
+                        section_title = element.get("text", "Untitled Section")
+                        break
+            elif section.get("title"):
+                section_title = section.get("title")
+            
+            content_type = section.get("content_type", "paragraph")
+            elements = section.get("elements", [])
+            
+            # Build slide content based on section type
+            content_parts = []
+            
+            if content_type == "table":
+                content_parts.append(self._format_table_for_slide(elements))
+            elif content_type == "list":
+                content_parts.append(self._format_list_for_slide(elements))
+            elif content_type == "heading":
+                content_parts.append(self._format_heading_for_slide(elements))
+            elif content_type == "paragraph":
+                content_parts.append(self._format_paragraph_for_slide(elements))
+            elif content_type == "code":
+                content_parts.append(self._format_code_for_slide(elements))
+            else:
+                content_parts.append(self._format_paragraph_for_slide(elements))
+            
+            # Combine content parts
+            slide_content = "\n\n".join(filter(None, content_parts))
+            
+            return {
+                "title": section_title,
+                "content": slide_content
+            }
+            
+        except Exception as e:
+            logger.warning(f"Error creating slide from section: {str(e)}")
+            return None
+    
+    def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
+        """Format table data for slide presentation."""
+        try:
+            # Extract table data from elements array
+            headers = []
+            rows = []
+            for element in elements:
+                if isinstance(element, dict) and "headers" in element and "rows" in element:
+                    headers = element.get("headers", [])
+                    rows = element.get("rows", [])
+                    break
+            
+            if not headers:
+                return ""
+            
+            # Create table representation
+            table_lines = []
+            
+            # Add headers
+            header_line = " | ".join(str(h) for h in headers)
+            table_lines.append(header_line)
+            
+            # Add separator
+            separator = "-" * len(header_line)
+            table_lines.append(separator)
+            
+            # Add data rows (limit based on content density)
+            max_rows = 5  # Default limit
+            for row in rows[:max_rows]:
+                row_line = " | ".join(str(cell) for cell in row)
+                table_lines.append(row_line)
+            
+            if len(rows) > max_rows:
+                table_lines.append(f"... and {len(rows) - max_rows} more rows")
+            
+            return "\n".join(table_lines)
+            
+        except Exception as e:
+            logger.warning(f"Error formatting table for slide: {str(e)}")
+            return ""
+    
+    def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
+        """Format list data for slide presentation."""
+        try:
+            items = list_data.get("items", [])
+            
+            if not items:
+                return ""
+            
+            # Create list representation
+            list_lines = []
+            
+            for item in items:
+                if isinstance(item, dict):
+                    text = item.get("text", "")
+                    list_lines.append(f"• {text}")
+                    
+                    # Add subitems (limit to 3 for readability)
+                    subitems = item.get("subitems", [])[:3]
+                    for subitem in subitems:
+                        if isinstance(subitem, dict):
+                            list_lines.append(f"  - {subitem.get('text', '')}")
+                        else:
+                            list_lines.append(f"  - {subitem}")
+                else:
+                    list_lines.append(f"• {str(item)}")
+            
+            return "\n".join(list_lines)
+            
+        except Exception as e:
+            logger.warning(f"Error formatting list for slide: {str(e)}")
+            return ""
+    
+    def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
+        """Format heading data for slide presentation."""
+        try:
+            text = heading_data.get("text", "")
+            level = heading_data.get("level", 1)
+            
+            if text:
+                return f"{'#' * level} {text}"
+            
+            return ""
+            
+        except Exception as e:
+            logger.warning(f"Error formatting heading for slide: {str(e)}")
+            return ""
+    
+    def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
+        """Format paragraph data for slide presentation."""
+        try:
+            text = paragraph_data.get("text", "")
+            
+            if text:
+                # Limit paragraph length based on content density
+                max_length = 200  # Default limit
+                if len(text) > max_length:
+                    text = text[:max_length] + "..."
+                
+                return text
+            
+            return ""
+            
+        except Exception as e:
+            logger.warning(f"Error formatting paragraph for slide: {str(e)}")
+            return ""
+    
+    def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
+        """Format code data for slide presentation."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            if code:
+                # Limit code length based on content density
+                max_length = 100  # Default limit
+                if len(code) > max_length:
+                    code = code[:max_length] + "..."
+                
+                if language:
+                    return f"Code ({language}):\n{code}"
+                else:
+                    return f"Code:\n{code}"
+            
+            return ""
+            
+        except Exception as e:
+            logger.warning(f"Error formatting code for slide: {str(e)}")
+            return ""
+    
+    def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
+        """Determine the best professional slide layout based on content."""
+        try:
+            content = slide_data.get("content", "")
+            title = slide_data.get("title", "")
+            
+            # Check if it's a title slide (first slide)
+            if not content or "Generated by PowerOn AI System" in content:
+                return 0  # Title slide layout
+            
+            # Professional layout selection based on content
+            if "|" in content and "-" in content:
+                # Has both tables and lists - use content with caption for professional look
+                return 2
+            elif "|" in content:
+                # Has tables - use content layout for clean table presentation
+                return 1
+            elif content.count("•") > 2:
+                # Has many bullet points - use content layout for better readability
+                return 1
+            elif len(content) > 200:
+                # Long content - use content layout for better text flow
+                return 1
+            elif title and len(title) > 20:
+                # Long title - use title and content layout
+                return 1
+            else:
+                # Default to title and content layout for professional appearance
+                return 1
+                
+        except Exception as e:
+            logger.warning(f"Error determining slide layout: {str(e)}")
+            return 1  # Default to title and content layout
+    
+    def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Create slides from sections based on content density and user intent."""
+        try:
+            slides = []
+            content_per_slide = styles.get("content_per_slide", "concise")
+            
+            # Group sections by type and create slides
+            current_slide_content = []
+            current_slide_title = "Content Overview"
+            
+            for section in sections:
+                section_type = section.get("content_type", "paragraph")
+                elements = section.get("elements", [])
+                
+                if section_type == "heading":
+                    # If we have accumulated content, create a slide
+                    if current_slide_content:
+                        slides.append({
+                            "title": current_slide_title,
+                            "content": "\n\n".join(current_slide_content)
+                        })
+                        current_slide_content = []
+                    
+                    # Start new slide with heading as title
+                    for element in elements:
+                        if isinstance(element, dict) and "text" in element:
+                            current_slide_title = element.get("text", "Untitled Section")
+                            break
+                else:
+                    # Add content to current slide
+                    formatted_content = self._format_section_content(section)
+                    if formatted_content:
+                        current_slide_content.append(formatted_content)
+            
+            # Add final slide if there's content
+            if current_slide_content:
+                slides.append({
+                    "title": current_slide_title,
+                    "content": "\n\n".join(current_slide_content)
+                })
+            
+            return slides
+            
+        except Exception as e:
+            logger.warning(f"Error creating slides from sections: {str(e)}")
+            return []
+    
+    def _format_section_content(self, section: Dict[str, Any]) -> str:
+        """Format section content for slide presentation."""
+        try:
+            content_type = section.get("content_type", "paragraph")
+            elements = section.get("elements", [])
+            
+            # Process each element in the section
+            content_parts = []
+            for element in elements:
+                if content_type == "table":
+                    content_parts.append(self._format_table_for_slide([element]))
+                elif content_type == "list":
+                    content_parts.append(self._format_list_for_slide([element]))
+                elif content_type == "heading":
+                    content_parts.append(self._format_heading_for_slide([element]))
+                elif content_type == "paragraph":
+                    content_parts.append(self._format_paragraph_for_slide([element]))
+                elif content_type == "code":
+                    content_parts.append(self._format_code_for_slide([element]))
+                else:
+                    content_parts.append(self._format_paragraph_for_slide([element]))
+            
+            return "\n\n".join(filter(None, content_parts))
+                
+        except Exception as e:
+            logger.warning(f"Error formatting section content: {str(e)}")
+            return ""
+    
+    def _format_timestamp(self) -> str:
+        """Format current timestamp for presentation generation."""
+        from datetime import datetime, UTC
+        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@ -0,0 +1,256 @@
+"""
+Text renderer for report generation.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+
+class RendererText(BaseRenderer):
+    """Renders content to plain text format with format-specific extraction."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported text formats (excluding formats with dedicated renderers)."""
+        return [
+            'txt', 'text', 'plain',
+            # Programming languages
+            'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
+            'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
+            'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
+            'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
+            'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
+            # Web technologies (excluding html/htm which have dedicated renderer)
+            'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
+            # Data formats (excluding csv, md/markdown which have dedicated renderers)
+            'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
+            # Configuration files
+            'env', 'properties', 'conf', 'config', 'rc',
+            'gitattributes', 'editorconfig', 'eslintrc',
+            # Documentation
+            'readme', 'changelog', 'license', 'authors',
+            'contributing', 'todo', 'notes', 'docs'
+        ]
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return [
+            'ascii', 'utf8', 'utf-8', 'code', 'source',
+            'script', 'program', 'file', 'document',
+            'raw', 'unformatted', 'plaintext'
+        ]
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for text renderer."""
+        return 90
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to plain text format."""
+        try:
+            # Generate text from JSON structure
+            text_content = self._generate_text_from_json(extracted_content, title)
+            
+            return text_content, "text/plain"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering text: {str(e)}")
+            # Return minimal text fallback
+            return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
+    
+    def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
+        """Generate text content from structured JSON document."""
+        try:
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Build text content
+            text_parts = []
+            
+            # Document title
+            text_parts.append(document_title)
+            text_parts.append("=" * len(document_title))
+            text_parts.append("")
+            
+            # Process each section
+            sections = json_content.get("sections", [])
+            for section in sections:
+                section_text = self._render_json_section(section)
+                if section_text:
+                    text_parts.append(section_text)
+                    text_parts.append("")  # Add spacing between sections
+            
+            # Add generation info
+            text_parts.append("")
+            text_parts.append(f"Generated: {self._format_timestamp()}")
+            
+            return '\n'.join(text_parts)
+            
+        except Exception as e:
+            self.logger.error(f"Error generating text from JSON: {str(e)}")
+            raise Exception(f"Text generation failed: {str(e)}")
+    
+    def _render_json_section(self, section: Dict[str, Any]) -> str:
+        """Render a single JSON section to text."""
+        try:
+            section_type = self._get_section_type(section)
+            section_data = self._get_section_data(section)
+            
+            if section_type == "table":
+                # Process the section data to extract table structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_table(processed_data)
+            elif section_type == "bullet_list":
+                # Process the section data to extract bullet list structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_bullet_list(processed_data)
+            elif section_type == "heading":
+                # Render each heading element in the elements array
+                # section_data is already the elements array from _get_section_data
+                rendered_elements = []
+                for element in section_data:
+                    rendered_elements.append(self._render_json_heading(element))
+                return "\n".join(rendered_elements)
+            elif section_type == "paragraph":
+                # Render each paragraph element in the elements array
+                # section_data is already the elements array from _get_section_data
+                rendered_elements = []
+                for element in section_data:
+                    rendered_elements.append(self._render_json_paragraph(element))
+                return "\n".join(rendered_elements)
+            elif section_type == "code_block":
+                # Process the section data to extract code block structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_code_block(processed_data)
+            elif section_type == "image":
+                # Process the section data to extract image structure
+                processed_data = self._process_section_by_type(section)
+                return self._render_json_image(processed_data)
+            else:
+                # Fallback to paragraph for unknown types - render each element
+                # section_data is already the elements array from _get_section_data
+                rendered_elements = []
+                for element in section_data:
+                    rendered_elements.append(self._render_json_paragraph(element))
+                return "\n".join(rendered_elements)
+                
+        except Exception as e:
+            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
+            return f"[Error rendering section: {str(e)}]"
+    
+    def _render_json_table(self, table_data: Dict[str, Any]) -> str:
+        """Render a JSON table to text."""
+        try:
+            headers = table_data.get("headers", [])
+            rows = table_data.get("rows", [])
+            
+            if not headers or not rows:
+                return ""
+            
+            text_parts = []
+            
+            # Create table header
+            header_line = " | ".join(str(header) for header in headers)
+            text_parts.append(header_line)
+            
+            # Add separator line
+            separator_line = " | ".join("-" * len(str(header)) for header in headers)
+            text_parts.append(separator_line)
+            
+            # Add data rows
+            for row in rows:
+                row_line = " | ".join(str(cell_data) for cell_data in row)
+                text_parts.append(row_line)
+            
+            return '\n'.join(text_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering table: {str(e)}")
+            return ""
+    
+    def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
+        """Render a JSON bullet list to text."""
+        try:
+            items = list_data.get("items", [])
+            
+            if not items:
+                return ""
+            
+            text_parts = []
+            for item in items:
+                if isinstance(item, str):
+                    text_parts.append(f"- {item}")
+                elif isinstance(item, dict) and "text" in item:
+                    text_parts.append(f"- {item['text']}")
+            
+            return '\n'.join(text_parts)
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering bullet list: {str(e)}")
+            return ""
+    
+    def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
+        """Render a JSON heading to text."""
+        try:
+            level = heading_data.get("level", 1)
+            text = heading_data.get("text", "")
+            
+            if text:
+                level = max(1, min(6, level))
+                if level == 1:
+                    return f"{text}\n{'=' * len(text)}"
+                elif level == 2:
+                    return f"{text}\n{'-' * len(text)}"
+                else:
+                    return f"{'#' * level} {text}"
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering heading: {str(e)}")
+            return ""
+    
+    def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
+        """Render a JSON paragraph to text."""
+        try:
+            text = paragraph_data.get("text", "")
+            return text if text else ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering paragraph: {str(e)}")
+            return ""
+    
+    def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
+        """Render a JSON code block to text."""
+        try:
+            code = code_data.get("code", "")
+            language = code_data.get("language", "")
+            
+            if code:
+                if language:
+                    return f"Code ({language}):\n{code}"
+                else:
+                    return code
+            
+            return ""
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering code block: {str(e)}")
+            return ""
+    
+    def _render_json_image(self, image_data: Dict[str, Any]) -> str:
+        """Render a JSON image to text."""
+        try:
+            alt_text = image_data.get("altText", "Image")
+            return f"[Image: {alt_text}]"
+            
+        except Exception as e:
+            self.logger.warning(f"Error rendering image: {str(e)}")
+            return f"[Image: {image_data.get('altText', 'Image')}]"
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -0,0 +1,791 @@
+"""
+Excel renderer for report generation using openpyxl.
+"""
+
+from .rendererBaseTemplate import BaseRenderer
+from typing import Dict, Any, Tuple, List
+import io
+import base64
+from datetime import datetime, UTC
+
+try:
+    from openpyxl import Workbook
+    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
+    from openpyxl.utils import get_column_letter
+    from openpyxl.worksheet.table import Table, TableStyleInfo
+    OPENPYXL_AVAILABLE = True
+except ImportError:
+    OPENPYXL_AVAILABLE = False
+
+class RendererXlsx(BaseRenderer):
+    """Renders content to Excel format using openpyxl."""
+    
+    @classmethod
+    def get_supported_formats(cls) -> List[str]:
+        """Return supported Excel formats."""
+        return ['xlsx', 'xls', 'excel']
+    
+    @classmethod
+    def get_format_aliases(cls) -> List[str]:
+        """Return format aliases."""
+        return ['spreadsheet', 'workbook']
+    
+    @classmethod
+    def get_priority(cls) -> int:
+        """Return priority for Excel renderer."""
+        return 110
+    
+    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
+        """Render extracted JSON content to Excel format using AI-analyzed styling."""
+        try:
+            if not OPENPYXL_AVAILABLE:
+                # Fallback to CSV if openpyxl not available
+                from .rendererCsv import RendererCsv
+                csv_renderer = RendererCsv()
+                csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
+                return csv_content, "text/csv"
+            
+            # Generate Excel using AI-analyzed styling
+            excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
+            
+            return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            
+        except Exception as e:
+            self.logger.error(f"Error rendering Excel: {str(e)}")
+            # Return CSV fallback
+            return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
+    
+    def _generate_excel(self, content: str, title: str) -> str:
+        """Generate Excel content using openpyxl."""
+        try:
+            # Create workbook
+            wb = Workbook()
+            
+            # Remove default sheet
+            wb.remove(wb.active)
+            
+            # Create sheets
+            summary_sheet = wb.create_sheet("Summary", 0)
+            data_sheet = wb.create_sheet("Data", 1)
+            analysis_sheet = wb.create_sheet("Analysis", 2)
+            
+            # Add content to sheets
+            self._populate_summary_sheet(summary_sheet, title)
+            self._populate_data_sheet(data_sheet, content)
+            self._populate_analysis_sheet(analysis_sheet, content)
+            
+            # Save to buffer
+            buffer = io.BytesIO()
+            wb.save(buffer)
+            buffer.seek(0)
+            
+            # Convert to base64
+            excel_bytes = buffer.getvalue()
+            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
+            
+            return excel_base64
+            
+        except Exception as e:
+            self.logger.error(f"Error generating Excel: {str(e)}")
+            raise
+    
+    def _populate_summary_sheet(self, sheet, title: str):
+        """Populate the summary sheet."""
+        try:
+            # Title
+            sheet['A1'] = title
+            sheet['A1'].font = Font(size=16, bold=True)
+            sheet['A1'].alignment = Alignment(horizontal='center')
+            
+            # Generation info
+            sheet['A3'] = "Generated:"
+            sheet['B3'] = self._format_timestamp()
+            sheet['A4'] = "Status:"
+            sheet['B4'] = "Generated Successfully"
+            
+            # Key metrics placeholder
+            sheet['A6'] = "Key Metrics:"
+            sheet['A6'].font = Font(bold=True)
+            sheet['A7'] = "Total Items:"
+            sheet['B7'] = "=COUNTA(Data!A:A)-1"  # Count non-empty cells in Data sheet
+            
+            # Auto-adjust column widths
+            sheet.column_dimensions['A'].width = 20
+            sheet.column_dimensions['B'].width = 30
+            
+        except Exception as e:
+            self.logger.warning(f"Could not populate summary sheet: {str(e)}")
+    
+    def _populate_data_sheet(self, sheet, content: str):
+        """Populate the data sheet."""
+        try:
+            # Headers
+            headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
+            for col, header in enumerate(headers, 1):
+                cell = sheet.cell(row=1, column=col, value=header)
+                cell.font = Font(bold=True)
+                cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
+            
+            # Process content
+            lines = content.split('\n')
+            row = 2
+            
+            for line in lines:
+                line = line.strip()
+                if not line:
+                    continue
+                
+                # Check for table data (lines with |)
+                if '|' in line:
+                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
+                    for col, cell_data in enumerate(cells[:5], 1):  # Limit to 5 columns
+                        sheet.cell(row=row, column=col, value=cell_data)
+                    row += 1
+                else:
+                    # Regular content
+                    sheet.cell(row=row, column=1, value=line)
+                    row += 1
+            
+            # Auto-adjust column widths
+            for col in range(1, 6):
+                sheet.column_dimensions[get_column_letter(col)].width = 20
+            
+        except Exception as e:
+            self.logger.warning(f"Could not populate data sheet: {str(e)}")
+    
+    def _populate_analysis_sheet(self, sheet, content: str):
+        """Populate the analysis sheet."""
+        try:
+            # Title
+            sheet['A1'] = "Analysis & Insights"
+            sheet['A1'].font = Font(size=14, bold=True)
+            
+            # Content analysis
+            lines = content.split('\n')
+            row = 3
+            
+            sheet['A3'] = "Content Analysis:"
+            sheet['A3'].font = Font(bold=True)
+            row += 1
+            
+            # Count different types of content
+            table_lines = sum(1 for line in lines if '|' in line)
+            list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
+            text_lines = len(lines) - table_lines - list_lines
+            
+            sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
+            row += 1
+            sheet[f'A{row}'] = f"Table Rows: {table_lines}"
+            row += 1
+            sheet[f'A{row}'] = f"List Items: {list_lines}"
+            row += 1
+            sheet[f'A{row}'] = f"Text Lines: {text_lines}"
+            row += 2
+            
+            # Recommendations
+            sheet[f'A{row}'] = "Recommendations:"
+            sheet[f'A{row}'].font = Font(bold=True)
+            row += 1
+            sheet[f'A{row}'] = "1. Review data accuracy"
+            row += 1
+            sheet[f'A{row}'] = "2. Consider additional analysis"
+            row += 1
+            sheet[f'A{row}'] = "3. Update regularly"
+            
+            # Auto-adjust column width
+            sheet.column_dimensions['A'].width = 30
+            
+        except Exception as e:
+            self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
+    
+    async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
+        """Generate Excel content from structured JSON document using AI-generated styling."""
+        try:
+            # Debug output
+            self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
+            self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
+            
+            # Get AI-generated styling definitions
+            styles = await self._get_excel_styles(user_prompt, ai_service)
+            
+            # Validate JSON structure
+            if not isinstance(json_content, dict):
+                raise ValueError("JSON content must be a dictionary")
+            
+            if "sections" not in json_content:
+                raise ValueError("JSON content must contain 'sections' field")
+            
+            # Use title from JSON metadata if available, otherwise use provided title
+            document_title = json_content.get("metadata", {}).get("title", title)
+            
+            # Create workbook
+            wb = Workbook()
+            
+            # Create sheets based on content
+            sheets = self._create_excel_sheets(wb, json_content, styles)
+            self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
+            
+            # Populate sheets with content
+            self._populate_excel_sheets(sheets, json_content, styles)
+            
+            # Save to buffer
+            buffer = io.BytesIO()
+            wb.save(buffer)
+            buffer.seek(0)
+            
+            # Convert to base64
+            excel_bytes = buffer.getvalue()
+            self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
+            try:
+                excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
+                self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
+            except Exception as b64_error:
+                self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
+                raise
+            
+            return excel_base64
+            
+        except Exception as e:
+            self.logger.error(f"Error generating Excel from JSON: {str(e)}")
+            raise Exception(f"Excel generation failed: {str(e)}")
+    
+    async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Get Excel styling definitions using base template AI styling."""
+        style_schema = {
+            "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
+            "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
+            "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
+            "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
+            "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
+            "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
+            "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
+        }
+        
+        style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
+        # Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
+        styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
+        
+        # Validate and fix contrast issues
+        return self._validate_excel_styles_contrast(styles)
+    
+    async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Get AI styles with proper Excel color conversion."""
+        if not ai_service:
+            return default_styles
+        
+        try:
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=style_template, context="", options=request_options)
+            response = await ai_service.aiObjects.call(request)
+            
+            import json
+            import re
+            
+            # Clean and parse JSON
+            result = response.content.strip() if response and response.content else ""
+            
+            # Check if result is empty
+            if not result:
+                self.logger.warning("AI styling returned empty response, using defaults")
+                return default_styles
+            
+            # Extract JSON from markdown if present
+            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1).strip()
+                self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
+            elif result.startswith('```json'):
+                result = re.sub(r'^```json\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+                self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
+            elif result.startswith('```'):
+                result = re.sub(r'^```\s*', '', result)
+                result = re.sub(r'\s*```$', '', result)
+                self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
+            
+            # Try to parse JSON
+            try:
+                styles = json.loads(result)
+            except json.JSONDecodeError as json_error:
+                self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
+                return default_styles
+            
+            # Convert colors to Excel aRGB format
+            styles = self._convert_colors_format(styles)
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
+            return default_styles
+
+    def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
+        """Get a safe aRGB color value for Excel (without # prefix)."""
+        if not isinstance(color_value, str):
+            return default
+        
+        # Remove # prefix if present
+        if color_value.startswith('#'):
+            color_value = color_value[1:]
+        
+        if len(color_value) == 6:
+            # Convert RRGGBB to AARRGGBB
+            return f"FF{color_value}"
+        elif len(color_value) == 8:
+            # Already aRGB format
+            return color_value
+        else:
+            # Unexpected format, return default
+            return default
+
+    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Convert hex colors to aRGB format for Excel compatibility."""
+        try:
+            self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
+            for style_name, style_config in styles.items():
+                if isinstance(style_config, dict):
+                    for prop, value in style_config.items():
+                        if isinstance(value, str) and value.startswith('#') and len(value) == 7:
+                            # Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
+                            styles[style_name][prop] = f"FF{value[1:]}"
+                        elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
+                            pass  # Already aRGB format
+                        elif isinstance(value, str) and value.startswith('#'):
+                            pass  # Unexpected format, keep as is
+            return styles
+        except Exception as e:
+            return styles
+    
+    def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate and fix contrast issues in AI-generated styles."""
+        try:
+            # Fix table header contrast
+            if "table_header" in styles:
+                header = styles["table_header"]
+                bg_color = header.get("background", "#FFFFFF")
+                text_color = header.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    header["background"] = "#4F4F4F"
+                    header["text_color"] = "#FFFFFF"
+            
+            # Fix table cell contrast
+            if "table_cell" in styles:
+                cell = styles["table_cell"]
+                bg_color = cell.get("background", "#FFFFFF")
+                text_color = cell.get("text_color", "#000000")
+                
+                # If both are white or both are dark, fix it
+                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
+                    cell["background"] = "#FFFFFF"
+                    cell["text_color"] = "#2F2F2F"
+            
+            return styles
+            
+        except Exception as e:
+            self.logger.warning(f"Style validation failed: {str(e)}")
+            return self._get_default_excel_styles()
+    
+    def _get_default_excel_styles(self) -> Dict[str, Any]:
+        """Default Excel styles with aRGB color format."""
+        return {
+            "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
+            "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
+            "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
+            "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
+            "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
+            "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
+            "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
+        }
+    
+    def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
+        """Create Excel sheets based on content structure and user intent."""
+        sheets = {}
+        
+        # Get sheet names from AI styles or generate based on content
+        sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
+        self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
+        
+        # Create sheets
+        for i, sheet_name in enumerate(sheet_names):
+            if i == 0:
+                # Use the default sheet for the first sheet
+                sheet = wb.active
+                sheet.title = sheet_name
+            else:
+                # Create additional sheets
+                sheet = wb.create_sheet(sheet_name, i)
+            sheets[sheet_name.lower()] = sheet
+        
+        return sheets
+    
+    def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
+        """Generate sheet names based on actual content structure."""
+        sections = json_content.get("sections", [])
+        
+        # If no sections, create a single sheet
+        if not sections:
+            return ["Content"]
+        
+        # Generate sheet names based on content structure
+        sheet_names = []
+        
+        # Check if we have multiple table sections
+        table_sections = [s for s in sections if s.get("content_type") == "table"]
+        
+        if len(table_sections) > 1:
+            # Create separate sheets for each table
+            for i, section in enumerate(table_sections, 1):
+                section_title = section.get("title", f"Table {i}")
+                sheet_names.append(section_title[:31])  # Excel sheet name limit
+        else:
+            # Single table or mixed content - create main sheet
+            document_title = json_content.get("metadata", {}).get("title", "Document")
+            sheet_names.append(document_title[:31])  # Excel sheet name limit
+            
+            # Add additional sheets for other content types
+            content_types = set()
+            for section in sections:
+                content_type = section.get("content_type", "paragraph")
+                content_types.add(content_type)
+            
+            if "table" in content_types and len(table_sections) == 1:
+                sheet_names.append("Table Data")
+            if "list" in content_types:
+                sheet_names.append("Lists")
+            if "paragraph" in content_types or "heading" in content_types:
+                sheet_names.append("Text")
+        
+        # Limit to 4 sheets maximum
+        return sheet_names[:4]
+    
+    def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
+        """Populate Excel sheets with content from JSON based on actual sheet names."""
+        try:
+            # Get the actual sheet names that were created
+            sheet_names = list(sheets.keys())
+            
+            if not sheet_names:
+                return
+            
+            sections = json_content.get("sections", [])
+            table_sections = [s for s in sections if s.get("content_type") == "table"]
+            
+            if len(table_sections) > 1:
+                # Multiple tables - populate each sheet with its corresponding table
+                for i, section in enumerate(table_sections):
+                    if i < len(sheet_names):
+                        sheet_name = sheet_names[i]
+                        sheet = sheets[sheet_name]
+                        self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
+            else:
+                # Single table or mixed content - use original logic
+                first_sheet_name = sheet_names[0]
+                self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
+                
+                # If we have multiple sheets, distribute content by type
+                if len(sheet_names) > 1:
+                    self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
+                
+        except Exception as e:
+            self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
+    
+    def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
+        """Populate a sheet with a single table section."""
+        try:
+            # Sheet title
+            sheet['A1'] = sheet_title
+            sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
+            sheet['A1'].alignment = Alignment(horizontal="center")
+            
+            # Get table data from elements (canonical JSON format)
+            elements = section.get("elements", [])
+            if elements and isinstance(elements, list) and len(elements) > 0:
+                table_data = elements[0]
+                headers = table_data.get("headers", [])
+                rows = table_data.get("rows", [])
+            else:
+                headers = []
+                rows = []
+            
+            if not headers and not rows:
+                sheet['A3'] = "No table data available"
+                return
+            
+            # Add headers
+            header_style = styles.get("table_header", {})
+            for col, header in enumerate(headers, 1):
+                cell = sheet.cell(row=3, column=col, value=header)
+                if header_style.get("bold"):
+                    cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
+                if header_style.get("background"):
+                    cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
+            
+            # Add rows
+            cell_style = styles.get("table_cell", {})
+            for row_idx, row_data in enumerate(rows, 4):
+                for col_idx, cell_value in enumerate(row_data, 1):
+                    cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
+                    if cell_style.get("text_color"):
+                        cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
+            
+            # Auto-adjust column widths
+            for col in range(1, len(headers) + 1):
+                sheet.column_dimensions[get_column_letter(col)].width = 20
+            
+        except Exception as e:
+            self.logger.warning(f"Could not populate table sheet: {str(e)}")
+    
+    def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
+        """Populate the main sheet with document overview and all content."""
+        try:
+            # Document title
+            document_title = json_content.get("metadata", {}).get("title", "Generated Report")
+            sheet['A1'] = document_title
+            
+            # Safety check for title style
+            title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
+            try:
+                safe_color = self._get_safe_color(title_style["color"])
+                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
+                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
+            except Exception as font_error:
+                # Try with a safe color
+                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
+                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
+            
+            # Generation info
+            sheet['A3'] = "Generated:"
+            sheet['B3'] = self._format_timestamp()
+            sheet['A4'] = "Status:"
+            sheet['B4'] = "Generated Successfully"
+            
+            # Document metadata
+            metadata = json_content.get("metadata", {})
+            if metadata:
+                sheet['A6'] = "Document Information:"
+                sheet['A6'].font = Font(bold=True)
+                
+                row = 7
+                for key, value in metadata.items():
+                    if key != "title":
+                        sheet[f'A{row}'] = f"{key.title()}:"
+                        sheet[f'B{row}'] = str(value)
+                        row += 1
+            
+            # Content overview
+            sections = json_content.get("sections", [])
+            sheet[f'A{row + 1}'] = "Content Overview:"
+            sheet[f'A{row + 1}'].font = Font(bold=True)
+            
+            row += 2
+            sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
+            
+            # Count different content types
+            content_types = {}
+            for section in sections:
+                content_type = section.get("content_type", "unknown")
+                content_types[content_type] = content_types.get(content_type, 0) + 1
+            
+            for content_type, count in content_types.items():
+                row += 1
+                sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
+            
+            # Add all content to this sheet
+            row += 2
+            for section in sections:
+                row = self._add_section_to_sheet(sheet, section, styles, row)
+                row += 1  # Empty row between sections
+            
+            # Auto-adjust column widths
+            sheet.column_dimensions['A'].width = 20
+            sheet.column_dimensions['B'].width = 30
+            
+        except Exception as e:
+            self.logger.warning(f"Could not populate main sheet: {str(e)}")
+    
+    def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
+        """Populate additional sheets based on content types."""
+        try:
+            sections = json_content.get("sections", [])
+            
+            for sheet_name in sheet_names:
+                if sheet_name not in sheets:
+                    continue
+                
+                sheet = sheets[sheet_name]
+                sheet_title = sheet_name.title()
+                sheet['A1'] = sheet_title
+                sheet['A1'].font = Font(size=16, bold=True)
+                
+                row = 3
+                
+                # Filter sections by content type
+                if sheet_name == "tables":
+                    filtered_sections = [s for s in sections if s.get("content_type") == "table"]
+                elif sheet_name == "lists":
+                    filtered_sections = [s for s in sections if s.get("content_type") == "list"]
+                elif sheet_name == "text":
+                    filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
+                else:
+                    filtered_sections = sections
+                
+                for section in filtered_sections:
+                    row = self._add_section_to_sheet(sheet, section, styles, row)
+                    row += 1  # Empty row between sections
+                
+                # Auto-adjust column widths
+                for col in range(1, 6):
+                    sheet.column_dimensions[get_column_letter(col)].width = 20
+                    
+        except Exception as e:
+            self.logger.warning(f"Could not populate content type sheets: {str(e)}")
+    
+    def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+        """Add a section to a sheet and return the next row."""
+        try:
+            # Add section title
+            section_title = section.get("title")
+            if section_title:
+                sheet[f'A{start_row}'] = f"# {section_title}"
+                sheet[f'A{start_row}'].font = Font(bold=True)
+                start_row += 1
+            
+            # Process section based on type
+            section_type = section.get("content_type", "paragraph")
+            
+            # Handle all section types using elements array
+            elements = section.get("elements", [])
+            for element in elements:
+                if section_type == "table":
+                    start_row = self._add_table_to_excel(sheet, element, styles, start_row)
+                elif section_type == "list":
+                    start_row = self._add_list_to_excel(sheet, element, styles, start_row)
+                elif section_type == "paragraph":
+                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
+                elif section_type == "heading":
+                    start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
+                else:
+                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
+            
+            return start_row
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add section to sheet: {str(e)}")
+            return start_row + 1
+    
+    def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+        """Add a table element to Excel sheet."""
+        try:
+            # In canonical JSON format, table elements have headers and rows directly
+            headers = element.get("headers", [])
+            rows = element.get("rows", [])
+            
+            if not headers and not rows:
+                return start_row
+            
+            # Add headers
+            header_style = styles.get("table_header", {})
+            for col, header in enumerate(headers, 1):
+                cell = sheet.cell(row=start_row, column=col, value=header)
+                if header_style.get("bold"):
+                    cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
+                if header_style.get("background"):
+                    cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
+            
+            start_row += 1
+            
+            # Add rows
+            cell_style = styles.get("table_cell", {})
+            for row_data in rows:
+                for col, cell_value in enumerate(row_data, 1):
+                    cell = sheet.cell(row=start_row, column=col, value=cell_value)
+                    if cell_style.get("text_color"):
+                        cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
+                start_row += 1
+            
+            return start_row
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add table to Excel: {str(e)}")
+            return start_row + 1
+    
+    def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+        """Add a list element to Excel sheet."""
+        try:
+            list_items = element.get("items", [])
+            
+            list_style = styles.get("bullet_list", {})
+            for item in list_items:
+                sheet.cell(row=start_row, column=1, value=f"• {item}")
+                if list_style.get("color"):
+                    sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
+                start_row += 1
+            
+            return start_row
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add list to Excel: {str(e)}")
+            return start_row + 1
+    
+    def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+        """Add a paragraph element to Excel sheet."""
+        try:
+            text = element.get("text", "")
+            if text:
+                sheet.cell(row=start_row, column=1, value=text)
+                
+                paragraph_style = styles.get("paragraph", {})
+                if paragraph_style.get("color"):
+                    sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
+                
+                start_row += 1
+            
+            return start_row
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
+            return start_row + 1
+    
+    def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
+        """Add a heading element to Excel sheet."""
+        try:
+            text = element.get("text", "")
+            level = element.get("level", 1)
+            
+            if text:
+                sheet.cell(row=start_row, column=1, value=text)
+                
+                heading_style = styles.get("heading", {})
+                font_size = heading_style.get("font_size", 14)
+                if level > 1:
+                    font_size = max(10, font_size - (level - 1) * 2)
+                
+                sheet.cell(row=start_row, column=1).font = Font(
+                    size=font_size, 
+                    bold=True, 
+                    color=self._get_safe_color(heading_style.get("color", "FF000000"))
+                )
+                
+                start_row += 1
+            
+            return start_row
+            
+        except Exception as e:
+            self.logger.warning(f"Could not add heading to Excel: {str(e)}")
+            return start_row + 1
+
+    def _format_timestamp(self) -> str:
+        """Format current timestamp for document generation."""
+        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/text_renderer.py
+++ b/modules/services/serviceGeneration/renderers/text_renderer.py
@ -1,94 +0,0 @@
-"""
-Text renderer for report generation.
-"""
-
-from .base_renderer import BaseRenderer
-from typing import Dict, Any, Tuple, List
-
-class TextRenderer(BaseRenderer):
-    """Renders content to plain text format with format-specific extraction."""
-    
-    @classmethod
-    def get_supported_formats(cls) -> List[str]:
-        """Return supported text formats (excluding formats with dedicated renderers)."""
-        return [
-            'txt', 'text', 'plain',
-            # Programming languages
-            'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
-            'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
-            'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
-            'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
-            'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
-            # Web technologies (excluding html/htm which have dedicated renderer)
-            'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
-            # Data formats (excluding csv, md/markdown which have dedicated renderers)
-            'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
-            # Configuration files
-            'env', 'properties', 'conf', 'config', 'rc',
-            'gitattributes', 'editorconfig', 'eslintrc',
-            # Documentation
-            'readme', 'changelog', 'license', 'authors',
-            'contributing', 'todo', 'notes', 'docs'
-        ]
-    
-    @classmethod
-    def get_format_aliases(cls) -> List[str]:
-        """Return format aliases."""
-        return [
-            'ascii', 'utf8', 'utf-8', 'code', 'source',
-            'script', 'program', 'file', 'document',
-            'raw', 'unformatted', 'plaintext'
-        ]
-    
-    @classmethod
-    def get_priority(cls) -> int:
-        """Return priority for text renderer."""
-        return 90
-    
-    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
-        """Return only plain-text guidelines; global prompt is built centrally."""
-        return (
-            "TEXT FORMAT GUIDELINES:\n"
-            "- Output ONLY plain text (no markdown or HTML).\n"
-            "- Use clear headings (you may underline with === or --- when helpful).\n"
-            "- Use simple bullet lists with '-' and tables with '|' when needed.\n"
-            "- Preserve indentation for code-like content if present.\n"
-            "OUTPUT: Return ONLY the raw text content."
-        )
-    
-    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
-        """Render extracted content to plain text format."""
-        try:
-            # The extracted content should already be formatted text from the AI
-            # Just clean it up
-            text_content = self._clean_text_content(extracted_content, title)
-            
-            return text_content, "text/plain"
-            
-        except Exception as e:
-            self.logger.error(f"Error rendering text: {str(e)}")
-            # Return minimal text fallback
-            return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
-    
-    def _clean_text_content(self, content: str, title: str) -> str:
-        """Clean and validate text content from AI."""
-        content = content.strip()
-        
-        # Remove markdown code blocks if present
-        if content.startswith("```") and content.endswith("```"):
-            lines = content.split('\n')
-            if len(lines) > 2:
-                content = '\n'.join(lines[1:-1]).strip()
-        
-        # Remove any remaining markdown formatting
-        content = content.replace('**', '').replace('*', '')
-        content = content.replace('__', '').replace('_', '')
-        
-        # Clean up any HTML-like tags that might have slipped through
-        import re
-        content = re.sub(r'<[^>]+>', '', content)
-        
-        # Ensure proper line endings
-        content = content.replace('\r\n', '\n').replace('\r', '\n')
-        
-        return content
--- a/modules/services/serviceGeneration/subJsonSchema.py
+++ b/modules/services/serviceGeneration/subJsonSchema.py
@ -0,0 +1,517 @@
+"""
+JSON Schema definitions for AI-generated document structures.
+This module provides schemas that guide AI to generate structured JSON output.
+"""
+
+from typing import Dict, Any
+
+
+def get_multi_document_subJsonSchema() -> Dict[str, Any]:
+    """Get the JSON schema for multi-document generation."""
+    return {
+        "type": "object",
+        "required": ["metadata", "documents"],
+        "properties": {
+            "metadata": {
+                "type": "object",
+                "required": ["title", "splitStrategy"],
+                "properties": {
+                    "title": {"type": "string", "description": "Document title"},
+                    "splitStrategy": {
+                        "type": "string",
+                        "enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
+                        "description": "Strategy for splitting content into multiple files"
+                    },
+                    "splitCriteria": {
+                        "type": "object",
+                        "description": "Custom criteria for splitting (e.g., entity_id, category, etc.)"
+                    },
+                    "fileNamingPattern": {
+                        "type": "string",
+                        "description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
+                    },
+                    "author": {"type": "string", "description": "Document author (optional)"},
+                    "source_documents": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "List of source document IDs"
+                    },
+                    "extraction_method": {
+                        "type": "string",
+                        "default": "ai_extraction",
+                        "description": "Method used for extraction"
+                    }
+                }
+            },
+            "documents": {
+                "type": "array",
+                "description": "Array of individual documents to generate",
+                "items": {
+                    "type": "object",
+                    "required": ["id", "title", "sections", "filename"],
+                    "properties": {
+                        "id": {"type": "string", "description": "Unique document identifier"},
+                        "title": {"type": "string", "description": "Document title"},
+                        "filename": {"type": "string", "description": "Generated filename"},
+                        "sections": {
+                            "type": "array",
+                            "description": "Document sections containing structured content",
+                            "items": {
+                                "type": "object",
+                                "required": ["id", "content_type", "elements", "order"],
+                                "properties": {
+                                    "id": {"type": "string", "description": "Unique section identifier"},
+                                    "title": {"type": "string", "description": "Section title (optional)"},
+                                    "content_type": {
+                                        "type": "string",
+                                        "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
+                                        "description": "Primary content type of this section"
+                                    },
+                                    "elements": {
+                                        "type": "array",
+                                        "description": "Content elements in this section",
+                                        "items": {
+                                            "oneOf": [
+                                                {"$ref": "#/definitions/table"},
+                                                {"$ref": "#/definitions/bullet_list"},
+                                                {"$ref": "#/definitions/paragraph"},
+                                                {"$ref": "#/definitions/heading"},
+                                                {"$ref": "#/definitions/code_block"}
+                                            ]
+                                        }
+                                    },
+                                    "order": {"type": "integer", "description": "Section order in document"},
+                                    "metadata": {
+                                        "type": "object",
+                                        "description": "Additional section metadata"
+                                    }
+                                }
+                            }
+                        },
+                        "metadata": {
+                            "type": "object",
+                            "description": "Document-specific metadata"
+                        }
+                    }
+                }
+            }
+        },
+        "definitions": {
+            "table": {
+                "type": "object",
+                "required": ["headers", "rows"],
+                "properties": {
+                    "headers": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "Table column headers"
+                    },
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        },
+                        "description": "Table data rows"
+                    },
+                    "caption": {
+                        "type": "string",
+                        "description": "Table caption (optional)"
+                    }
+                }
+            },
+            "bullet_list": {
+                "type": "object",
+                "required": ["items"],
+                "properties": {
+                    "items": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "required": ["text"],
+                            "properties": {
+                                "text": {"type": "string", "description": "List item text"},
+                                "subitems": {
+                                    "type": "array",
+                                    "items": {"$ref": "#/definitions/list_item"},
+                                    "description": "Nested sub-items (optional)"
+                                }
+                            }
+                        },
+                        "description": "List items"
+                    },
+                    "list_type": {
+                        "type": "string",
+                        "enum": ["bullet", "numbered", "checklist"],
+                        "default": "bullet",
+                        "description": "Type of list"
+                    }
+                }
+            },
+            "list_item": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "List item text"},
+                    "subitems": {
+                        "type": "array",
+                        "items": {"$ref": "#/definitions/list_item"},
+                        "description": "Nested sub-items (optional)"
+                    }
+                }
+            },
+            "paragraph": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "Paragraph text"},
+                    "formatting": {
+                        "type": "object",
+                        "description": "Text formatting (bold, italic, etc.)"
+                    }
+                }
+            },
+            "heading": {
+                "type": "object",
+                "required": ["text", "level"],
+                "properties": {
+                    "text": {"type": "string", "description": "Heading text"},
+                    "level": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 6,
+                        "description": "Heading level (1-6)"
+                    }
+                }
+            },
+            "code_block": {
+                "type": "object",
+                "required": ["code"],
+                "properties": {
+                    "code": {"type": "string", "description": "Code content"},
+                    "language": {"type": "string", "description": "Programming language (optional)"}
+                }
+            }
+        }
+    }
+
+def get_document_subJsonSchema() -> Dict[str, Any]:
+    """Get the JSON schema for structured document generation (single document)."""
+    return {
+        "type": "object",
+        "required": ["metadata", "sections"],
+        "properties": {
+            "metadata": {
+                "type": "object",
+                "required": ["title"],
+                "properties": {
+                    "title": {"type": "string", "description": "Document title"},
+                    "author": {"type": "string", "description": "Document author (optional)"},
+                    "source_documents": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "List of source document IDs"
+                    },
+                    "extraction_method": {
+                        "type": "string",
+                        "default": "ai_extraction",
+                        "description": "Method used for extraction"
+                    }
+                }
+            },
+            "sections": {
+                "type": "array",
+                "description": "Document sections containing structured content",
+                "items": {
+                    "type": "object",
+                    "required": ["id", "content_type", "elements", "order"],
+                    "properties": {
+                        "id": {"type": "string", "description": "Unique section identifier"},
+                        "title": {"type": "string", "description": "Section title (optional)"},
+                        "content_type": {
+                            "type": "string",
+                            "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
+                            "description": "Primary content type of this section"
+                        },
+                        "elements": {
+                            "type": "array",
+                            "description": "Content elements in this section",
+                            "items": {
+                                "oneOf": [
+                                    {"$ref": "#/definitions/table"},
+                                    {"$ref": "#/definitions/bullet_list"},
+                                    {"$ref": "#/definitions/paragraph"},
+                                    {"$ref": "#/definitions/heading"},
+                                    {"$ref": "#/definitions/code_block"}
+                                ]
+                            }
+                        },
+                        "order": {"type": "integer", "description": "Section order in document"},
+                        "metadata": {
+                            "type": "object",
+                            "description": "Additional section metadata"
+                        }
+                    }
+                }
+            },
+            "summary": {
+                "type": "string",
+                "description": "Document summary (optional)"
+            },
+            "tags": {
+                "type": "array",
+                "items": {"type": "string"},
+                "description": "Document tags for categorization"
+            }
+        },
+        "definitions": {
+            "table": {
+                "type": "object",
+                "required": ["headers", "rows"],
+                "properties": {
+                    "headers": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "Table column headers"
+                    },
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        },
+                        "description": "Table data rows"
+                    },
+                    "caption": {
+                        "type": "string",
+                        "description": "Table caption (optional)"
+                    }
+                }
+            },
+            "bullet_list": {
+                "type": "object",
+                "required": ["items"],
+                "properties": {
+                    "items": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "required": ["text"],
+                            "properties": {
+                                "text": {"type": "string", "description": "List item text"},
+                                "subitems": {
+                                    "type": "array",
+                                    "items": {"$ref": "#/definitions/list_item"},
+                                    "description": "Nested sub-items (optional)"
+                                }
+                            }
+                        },
+                        "description": "List items"
+                    },
+                    "list_type": {
+                        "type": "string",
+                        "enum": ["bullet", "numbered", "checklist"],
+                        "default": "bullet",
+                        "description": "Type of list"
+                    }
+                }
+            },
+            "list_item": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "List item text"},
+                    "subitems": {
+                        "type": "array",
+                        "items": {"$ref": "#/definitions/list_item"},
+                        "description": "Nested sub-items (optional)"
+                    }
+                }
+            },
+            "paragraph": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "Paragraph text"},
+                    "formatting": {
+                        "type": "object",
+                        "description": "Text formatting (bold, italic, etc.)"
+                    }
+                }
+            },
+            "heading": {
+                "type": "object",
+                "required": ["text", "level"],
+                "properties": {
+                    "text": {"type": "string", "description": "Heading text"},
+                    "level": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 6,
+                        "description": "Heading level (1-6)"
+                    }
+                }
+            },
+            "code_block": {
+                "type": "object",
+                "required": ["code"],
+                "properties": {
+                    "code": {"type": "string", "description": "Code content"},
+                    "language": {"type": "string", "description": "Programming language (optional)"}
+                }
+            }
+        }
+    }
+
+
+def get_extraction_prompt_template() -> str:
+    """Get the template for AI extraction prompts that request JSON output."""
+    return """
+You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
+
+IMPORTANT: You must respond with valid JSON only. No additional text, explanations, or formatting outside the JSON structure.
+
+JSON Schema Requirements:
+- Extract the actual data from the source documents
+- If content is a table, extract it as a table with headers and rows
+- If content is a list, extract it as a structured list with items
+- If content is text, extract it as paragraphs or headings
+- Preserve the original structure and data - do not summarize or interpret
+- Use the exact JSON schema provided
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+
+Return only the JSON structure following the schema. Do not include any text before or after the JSON.
+"""
+
+
+def get_generation_prompt_template() -> str:
+    """Get the template for AI generation prompts that work with JSON input."""
+    return """
+You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
+
+IMPORTANT: You must respond with valid JSON only, following the document schema.
+
+Generation Guidelines:
+- Use the provided JSON structure as the foundation
+- Enhance the content with proper formatting and organization
+- Ensure logical flow and readability
+- Maintain the original data integrity
+- Add appropriate headings and sections
+- Organize content in a logical sequence
+
+Content Enhancement:
+- Tables: Ensure proper headers and data alignment
+- Lists: Use appropriate list types (bullet, numbered, checklist)
+- Headings: Use appropriate heading levels for hierarchy
+- Paragraphs: Ensure proper text flow and formatting
+- Code: Preserve code blocks with proper language identification
+
+Return only the enhanced JSON structure following the schema. Do not include any text before or after the JSON.
+"""
+
+
+def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
+    """Automatically select appropriate schema based on prompt analysis."""
+    if prompt_analysis and prompt_analysis.get("is_multi_file", False):
+        return get_multi_document_subJsonSchema()
+    else:
+        return get_document_subJsonSchema()
+
+def validate_json_document(json_data: Dict[str, Any]) -> bool:
+    """Validate that the JSON data follows the document schema."""
+    try:
+        # Basic validation - check required fields
+        if not isinstance(json_data, dict):
+            return False
+        
+        # Check if it's multi-document or single-document structure
+        if "documents" in json_data:
+            # Multi-document structure
+            if "metadata" not in json_data:
+                return False
+            
+            metadata = json_data["metadata"]
+            if not isinstance(metadata, dict) or "title" not in metadata or "splitStrategy" not in metadata:
+                return False
+            
+            documents = json_data["documents"]
+            if not isinstance(documents, list):
+                return False
+            
+            # Validate each document
+            for doc in documents:
+                if not isinstance(doc, dict):
+                    return False
+                
+                required_fields = ["id", "title", "sections", "filename"]
+                for field in required_fields:
+                    if field not in doc:
+                        return False
+                
+                # Validate sections in each document
+                sections = doc.get("sections", [])
+                if not isinstance(sections, list):
+                    return False
+                
+                for section in sections:
+                    if not isinstance(section, dict):
+                        return False
+                    
+                    section_required = ["id", "content_type", "elements", "order"]
+                    for field in section_required:
+                        if field not in section:
+                            return False
+                    
+                    # Validate content_type
+                    valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+                    if section["content_type"] not in valid_types:
+                        return False
+                    
+                    # Validate elements
+                    if not isinstance(section["elements"], list):
+                        return False
+            
+        elif "sections" in json_data:
+            # Single-document structure (existing validation)
+            if "metadata" not in json_data:
+                return False
+            
+            metadata = json_data["metadata"]
+            if not isinstance(metadata, dict) or "title" not in metadata:
+                return False
+            
+            sections = json_data["sections"]
+            if not isinstance(sections, list):
+                return False
+            
+            # Validate each section
+            for i, section in enumerate(sections):
+                if not isinstance(section, dict):
+                    return False
+                
+                required_fields = ["id", "content_type", "elements", "order"]
+                for field in required_fields:
+                    if field not in section:
+                        return False
+                
+                # Validate content_type
+                valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+                if section["content_type"] not in valid_types:
+                    return False
+                
+                # Validate elements
+                if not isinstance(section["elements"], list):
+                    return False
+        else:
+            return False
+        
+        return True
+        
+    except Exception:
+        return False
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@ -0,0 +1,738 @@
+"""
+Prompt builder for AI document generation and extraction.
+This module builds prompts for AI services to extract and generate documents.
+"""
+
+import json
+import logging
+from typing import Dict, Any, Optional, List, TYPE_CHECKING
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+
+# Type hint for renderer parameter
+if TYPE_CHECKING:
+    from .renderers.rendererBaseTemplate import BaseRenderer
+    _RendererLike = BaseRenderer
+else:
+    _RendererLike = Any
+
+logger = logging.getLogger(__name__)
+
+async def buildAdaptiveExtractionPrompt(
+    outputFormat: str,
+    userPrompt: str,
+    title: str,
+    promptAnalysis: Dict[str, Any],
+    aiService=None,
+    services=None
+) -> str:
+    """
+    Build adaptive extraction prompt based on AI analysis.
+    Uses multi-file or single-file approach based on analysis.
+    """
+    
+    # Multi-file example data instead of schema
+    multi_file_example = {
+        "metadata": {
+            "title": "Multi-Document Example",
+            "splitStrategy": "by_section",
+            "source_documents": ["doc_001"],
+            "extraction_method": "ai_extraction"
+        },
+        "documents": [
+            {
+                "id": "doc_section_1",
+                "title": "Section 1 Title",
+                "filename": "section_1.xlsx",
+                "sections": [
+                    {
+                        "id": "section_1",
+                        "content_type": "heading",
+                        "elements": [
+                            {
+                                "level": 1,
+                                "text": "1. SECTION TITLE"
+                            }
+                        ],
+                        "order": 1
+                    },
+                    {
+                        "id": "section_2",
+                        "content_type": "paragraph",
+                        "elements": [
+                            {
+                                "text": "This is the actual content that should be extracted from the document."
+                            }
+                        ],
+                        "order": 2
+                    },
+                    {
+                        "id": "section_3",
+                        "content_type": "table",
+                        "elements": [
+                            {
+                                "headers": ["Column 1", "Column 2"],
+                                "rows": [["Value 1", "Value 2"]]
+                            }
+                        ],
+                        "order": 3
+                    }
+                ]
+            }
+        ]
+    }
+    
+    # Single-file example data instead of schema
+    single_file_example = {
+        "metadata": {
+            "title": "Single Document Example",
+            "source_documents": ["doc_001"],
+            "extraction_method": "ai_extraction"
+        },
+        "sections": [
+            {
+                "id": "section_1",
+                "content_type": "heading",
+                "elements": [
+                    {
+                        "level": 1,
+                        "text": "1. SECTION TITLE"
+                    }
+                ],
+                "order": 1
+            },
+            {
+                "id": "section_2",
+                "content_type": "paragraph",
+                "elements": [
+                    {
+                        "text": "This is the actual content that should be extracted from the document."
+                    }
+                ],
+                "order": 2
+            },
+            {
+                "id": "section_3",
+                "content_type": "table",
+                "elements": [
+                    {
+                        "headers": ["Column 1", "Column 2"],
+                        "rows": [["Value 1", "Value 2"]]
+                    }
+                ],
+                "order": 3
+            }
+        ]
+    }
+    
+    if promptAnalysis.get("is_multi_file", False):
+        # Multi-file prompt
+        adaptive_prompt = f"""
+{userPrompt}
+
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+
+TASK: Extract the actual content from the document and organize it into separate sections, where each section will become a separate file.
+
+REQUIREMENTS:
+1. Analyze the document content provided in the context below
+2. Identify distinct sections in the document (by headings, topics, or logical breaks)
+3. Create one JSON document entry for each section found
+4. Extract the real content from each section (headings, paragraphs, lists, etc.)
+5. Generate appropriate filenames for each section
+
+CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
+
+OUTPUT FORMAT: Return only valid JSON in this exact structure:
+{json.dumps(multi_file_example, indent=2)}
+
+IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
+- "id": unique identifier
+- "title": section title from the document
+- "filename": appropriate filename for the section
+- "sections": array of content sections
+
+DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
+
+INSTRUCTIONS:
+- Replace "REPLACE_WITH_ACTUAL_*" placeholders with real content from the document
+- Use actual section titles, headings, and text from the document
+- Create meaningful filenames based on section content
+- Ensure each section contains the complete content for that part of the document
+- Do not use generic placeholder text like "Section 1", "Section 2"
+- Extract real headings, paragraphs, lists, and other content elements
+- CRITICAL: Return JSON with "documents" array, not "sections" array
+
+CONTEXT (Document Content):
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+""".strip()
+    else:
+        # Single-file prompt - use example data instead of schema
+        adaptive_prompt = f"""
+{userPrompt}
+
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+
+TASK: Extract the actual content from the document and organize it into structured sections.
+
+REQUIREMENTS:
+1. Analyze the document content provided in the context below
+2. Extract all content and organize it into logical sections
+3. Create structured JSON with sections containing the extracted content
+4. Preserve the original structure and data
+
+OUTPUT FORMAT: Return only valid JSON in this exact structure:
+{json.dumps(single_file_example, indent=2)}
+
+INSTRUCTIONS:
+- Replace example data with actual content from the document
+- Use actual headings, paragraphs, and text from the document
+- Ensure all content is properly structured
+- Do not use generic placeholder text
+- Extract real content from the documents
+
+CONTEXT (Document Content):
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+""".strip()
+    
+    return adaptive_prompt
+
+async def buildGenericExtractionPrompt(
+    outputFormat: str,
+    userPrompt: str,
+    title: str,
+    aiService=None,
+    services=None
+) -> str:
+    """Build generic extraction prompt that works for both single and multi-file."""
+    
+    # Use AI to determine the best approach
+    if aiService:
+        try:
+            analysis_prompt = f"""
+Analyze this user request and determine the best JSON structure for document extraction.
+
+User request: "{userPrompt}"
+
+Respond with JSON only:
+{{
+    "requires_multi_file": true/false,
+    "recommended_schema": "single_document|multi_document",
+    "split_approach": "description of how to organize content",
+    "file_naming": "suggested naming pattern"
+}}
+
+Consider the user's intent and the most logical way to organize the extracted content.
+"""
+            
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+            response = await aiService.aiObjects.call(request)
+            
+            if response and response.content:
+                import re
+                
+                result = response.content.strip()
+                json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                
+                analysis = json.loads(result)
+                
+                # Use analysis to build appropriate prompt
+                return await buildAdaptiveExtractionPrompt(
+                    outputFormat, userPrompt, title, analysis, aiService, services
+                )
+        except Exception as e:
+            services.utils.debugLogToFile(f"Generic prompt analysis failed: {str(e)}", "PROMPT_BUILDER")
+    
+    # Fallback to single-file prompt
+    example_data = {
+        "metadata": {
+            "title": "Example Document",
+            "author": "AI Assistant",
+            "source_documents": ["document_001"],
+            "extraction_method": "ai_extraction"
+        },
+        "sections": [
+            {
+                "id": "section_001",
+                "content_type": "heading",
+                "elements": [
+                    {
+                        "level": 1,
+                        "text": "1. SECTION TITLE"
+                    }
+                ],
+                "order": 1,
+                "metadata": {}
+            }
+        ],
+        "summary": "",
+        "tags": []
+    }
+    
+    return f"""
+{userPrompt}
+
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+
+TASK: Extract the actual content from the document and organize it into structured sections.
+
+REQUIREMENTS:
+1. Analyze the document content provided in the context below
+2. Extract all content and organize it into logical sections
+3. Create structured JSON with sections containing the extracted content
+4. Preserve the original structure and data
+
+OUTPUT FORMAT: Return only valid JSON in this exact structure:
+{json.dumps(example_data, indent=2)}
+
+Requirements:
+- Preserve all original data - do not summarize or interpret
+- Use the exact JSON format shown above
+- Maintain data integrity and structure
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+
+DO NOT return a schema description - return actual extracted content in the JSON format shown above.
+"""
+
+async def buildExtractionPrompt(
+    outputFormat: str,
+    renderer: _RendererLike,
+    userPrompt: str,
+    title: str,
+    aiService=None,
+    services=None
+) -> str:
+    """
+    Build the final extraction prompt by combining:
+    - Parsed extraction intent from user prompt (using AI)
+    - Generic cross-format instructions (filename header + real-data policy)
+    - Format-specific guidelines snippet provided by the renderer
+
+    The AI must place a single filename header at the very top:
+    FILENAME: <safe-file-name-with-extension>
+    followed by a blank line and then ONLY the document content according to the target format.
+    """
+
+    # Parse user prompt to separate extraction intent from generation format using AI
+    extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services)
+    
+    # Import JSON schema for structured output
+    from .subJsonSchema import get_document_subJsonSchema
+    jsonSchema = get_document_subJsonSchema()
+
+    # Generic block for JSON extraction - use mixed example data showing different content types
+    example_data = {
+        "metadata": {
+            "title": "Example Document",
+            "author": "AI Assistant",
+            "source_documents": ["document_001"],
+            "extraction_method": "ai_extraction"
+        },
+        "sections": [
+            {
+                "id": "section_001",
+                "content_type": "heading",
+                "elements": [
+                    {
+                        "level": 1,
+                        "text": "1. INTRODUCTION"
+                    }
+                ],
+                "order": 1,
+                "metadata": {}
+            },
+            {
+                "id": "section_002",
+                "content_type": "paragraph",
+                "elements": [
+                    {
+                        "text": "This is a sample paragraph with actual content that should be extracted from the document."
+                    }
+                ],
+                "order": 2,
+                "metadata": {}
+            },
+            {
+                "id": "section_003",
+                "content_type": "table",
+                "elements": [
+                    {
+                        "headers": ["Column 1", "Column 2", "Column 3"],
+                        "rows": [
+                            ["Value 1", "Value 2", "Value 3"],
+                            ["Value 4", "Value 5", "Value 6"]
+                        ]
+                    }
+                ],
+                "order": 3,
+                "metadata": {}
+            }
+        ],
+        "summary": "",
+        "tags": []
+    }
+    
+    genericIntro = f"""
+{extractionIntent}
+
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+
+TASK: Extract the actual content from the document and organize it into structured sections.
+
+REQUIREMENTS:
+1. Analyze the document content provided in the context below
+2. Extract all content and organize it into logical sections
+3. Create structured JSON with sections containing the extracted content
+4. Preserve the original structure and data
+
+OUTPUT FORMAT: Return only valid JSON in this exact structure:
+{json.dumps(example_data, indent=2)}
+
+Requirements:
+- Preserve all original data - do not summarize or interpret
+- Use the exact JSON format shown above
+- Maintain data integrity and structure
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+
+DO NOT return a schema description - return actual extracted content in the JSON format shown above.
+"""
+
+    # Get format-specific guidelines from renderer
+    formatGuidelines = ""
+    try:
+        if hasattr(renderer, 'getExtractionGuidelines'):
+            formatGuidelines = renderer.getExtractionGuidelines()
+    except Exception:
+        pass
+
+    # Combine all parts
+    finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
+    
+    # Save extraction prompt to debug file - only if debug enabled
+    try:
+        debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+        if debug_enabled:
+            import os
+            from datetime import datetime, UTC
+            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+            debug_root = "./test-chat/ai"
+            os.makedirs(debug_root, exist_ok=True)
+            with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
+                f.write(finalPrompt)
+    except Exception:
+        pass
+
+    return finalPrompt
+
+
+async def buildGenerationPrompt(
+    outputFormat: str,
+    userPrompt: str,
+    title: str,
+    aiService=None,
+    services=None
+) -> str:
+    """
+    Use AI to build the generation prompt based on user intent and format requirements.
+    Focus on what's important for the user and how to structure the content.
+    """
+    if not aiService:
+        # Fallback if no AI service available
+        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
+    
+    try:
+        # Protect userPrompt from injection
+        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
+        
+        # Debug output
+        services.utils.debugLogToFile(f"GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'", "PROMPT_BUILDER")
+        
+        # AI call to generate the appropriate generation prompt
+        generationPromptRequest = f"""
+You are creating instructions for an AI to generate JSON content in the CANONICAL FORMAT that will be converted to a {outputFormat} document.
+
+User request: "{safeUserPrompt}"
+Document title: "{title}"
+Target format: {outputFormat}
+
+Write clear, detailed instructions that tell the AI how to generate JSON content using the CANONICAL JSON FORMAT. Focus on:
+
+1. What content is most important for the user
+2. How to structure and organize the content using the canonical JSON format with 'sections'
+3. Specific formatting requirements for the target format
+4. Language requirements to preserve
+5. How to ensure the JSON content meets the user's needs
+
+CRITICAL: The AI MUST generate content using the CANONICAL JSON FORMAT with this exact structure:
+{{
+  "metadata": {{
+    "title": "Document Title"
+  }},
+  "sections": [
+    {{
+      "id": "section_1",
+      "content_type": "heading",
+      "elements": [
+        {{
+          "level": 1,
+          "text": "1. SECTION TITLE"
+        }}
+      ],
+      "order": 1
+    }},
+    {{
+      "id": "section_2",
+      "content_type": "paragraph",
+      "elements": [
+        {{
+          "text": "This is the actual content that should be extracted from the document."
+        }}
+      ],
+      "order": 2
+    }},
+    {{
+      "id": "section_3",
+      "content_type": "table",
+      "elements": [
+        {{
+          "headers": ["Column 1", "Column 2", "Column 3"],
+          "rows": [
+            ["Value 1", "Value 2", "Value 3"],
+            ["Value 4", "Value 5", "Value 6"]
+          ]
+        }}
+      ],
+      "order": 3
+    }}
+  ]
+}}
+
+The AI should NOT create format-specific structures like "sheets" or "columns" - only use the canonical format with "sections" and "elements".
+
+Write the instructions as plain text, not JSON. Start with "Generate JSON content that..." and provide clear, actionable instructions for creating structured JSON data in the canonical format.
+"""
+        
+        # Call AI service to generate the prompt
+        services.utils.debugLogToFile("GENERATION PROMPT REQUEST: Calling AI for generation prompt...", "PROMPT_BUILDER")
+        
+        # Import and set proper options for AI call
+        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+        request_options = AiCallOptions()
+        request_options.operationType = OperationType.GENERAL
+        
+        request = AiCallRequest(prompt=generationPromptRequest, context="", options=request_options)
+        response = await aiService.aiObjects.call(request)
+        result = response.content if response else ""
+        
+        # Replace the placeholder that the AI created with actual format rules
+        if result:
+            formatRules = _getFormatRules(outputFormat)
+            result = result.replace("PLACEHOLDER_FOR_FORMAT_RULES", formatRules)
+        
+        # Debug output
+        services.utils.debugLogToFile(f"GENERATION PROMPT: Generated successfully", "PROMPT_BUILDER")
+        
+        # Save full generation prompt and AI response to debug file - only if debug enabled
+        try:
+            debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if debug_enabled:
+                import os
+                from datetime import datetime, UTC
+                ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                debug_root = "./test-chat/ai"
+                os.makedirs(debug_root, exist_ok=True)
+                with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f:
+                    f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n")
+                    f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n")
+                    f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n")
+        except Exception:
+            pass
+        
+        return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
+        
+    except Exception as e:
+        # Fallback on any error - preserve user prompt for language instructions
+        services.utils.debugLogToFile(f"DEBUG: AI generation prompt failed: {str(e)}", "PROMPT_BUILDER")
+        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"
+
+
+def _getFormatRules(outputFormat: str) -> str:
+    """
+    Get format-specific rules for the generation prompt.
+    """
+    format_rules = {
+        "xlsx": """
+XLSX Format Rules:
+- Create tables with clear headers and organized data
+- Use appropriate column widths and formatting
+- Include summary information if relevant
+- Ensure data is properly structured for spreadsheet analysis
+""",
+        "pdf": """
+PDF Format Rules:
+- Create professional document layout
+- Use appropriate headings and sections
+- Include proper spacing and formatting
+- Ensure content is well-organized and readable
+""",
+        "docx": """
+DOCX Format Rules:
+- Create professional document layout
+- Use appropriate headings and sections
+- Include proper spacing and formatting
+- Ensure content is well-organized and readable
+""",
+        "html": """
+HTML Format Rules:
+- Create clean, semantic HTML structure
+- Use appropriate tags for content organization
+- Include proper styling classes
+- Ensure content is accessible and well-formatted
+""",
+        "json": """
+JSON Format Rules:
+- Create well-structured JSON data
+- Use appropriate nesting and organization
+- Include metadata and context information
+- Ensure data is properly formatted and valid
+""",
+        "csv": """
+CSV Format Rules:
+- Create clear, organized tabular data
+- Use appropriate headers and data types
+- Ensure proper CSV formatting
+- Include all relevant data in structured format
+""",
+        "txt": """
+TXT Format Rules:
+- Create clean, readable text format
+- Use appropriate spacing and organization
+- Include clear headings and sections
+- Ensure content is well-structured and easy to read
+"""
+    }
+    
+    return format_rules.get(outputFormat.lower(), f"""
+{outputFormat.upper()} Format Rules:
+- Create well-structured content appropriate for {outputFormat}
+- Use appropriate formatting and organization
+- Ensure content is clear and professional
+- Include all relevant information in proper format
+""")
+
+
+async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
+    """
+    Parse user prompt to extract the core extraction intent.
+    """
+    if not aiService:
+        return f"Extract content from the provided documents and create a {outputFormat} report."
+    
+    try:
+        analysis_prompt = f"""
+Analyze this user request and extract the core extraction intent:
+
+User request: "{userPrompt}"
+Target format: {outputFormat}
+
+Extract the main intent and requirements for document processing. Focus on:
+1. What content needs to be extracted
+2. How it should be organized
+3. Any specific requirements or preferences
+
+Respond with a clear, concise statement of the extraction intent.
+"""
+        
+        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+        request_options = AiCallOptions()
+        request_options.operationType = OperationType.GENERAL
+        
+        request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+        response = await aiService.aiObjects.call(request)
+        
+        if response and response.content:
+            return response.content.strip()
+        else:
+            return f"Extract content from the provided documents and create a {outputFormat} report."
+            
+    except Exception as e:
+        services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
+        return f"Extract content from the provided documents and create a {outputFormat} report."
+
--- a/modules/services/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py
@ -32,7 +32,7 @@ class NeutralizationService:
            serviceCenter: Service center instance for accessing other services
            NamesToParse: List of names to parse and replace (case-insensitive)
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.interfaceDbApp = serviceCenter.interfaceDbApp
        
        # Initialize anonymization processors
--- a/modules/services/serviceNormalization/mainServiceNormalization.py
+++ b/modules/services/serviceNormalization/mainServiceNormalization.py
@ -0,0 +1,264 @@
+import json
+import os
+from typing import Any, Dict, List, Set
+from datetime import datetime, UTC
+
+
+class NormalizationService:
+    """
+    Produces a single canonical table in merged JSON using an AI-provided header mapping
+    and deterministic, in-code value normalization. No language heuristics in code.
+    """
+
+    def __init__(self, services):
+        self.services = services
+
+    # Public API
+    def discoverStructures(self, mergedJson: Dict[str, Any]) -> Dict[str, Any]:
+        headers: Set[str] = set()
+        samples: Dict[str, List[str]] = {}
+
+        sections = mergedJson.get("sections", []) if isinstance(mergedJson, dict) else []
+        for section in sections:
+            if not isinstance(section, dict):
+                continue
+            
+            # Use only the fundamental agreed JSON structure: content_type/elements
+            if section.get("content_type") != "table":
+                continue
+            
+            # Extract table data from elements array
+            hdrs = []
+            rows = []
+            for element in section.get("elements", []):
+                if isinstance(element, dict) and "headers" in element and "rows" in element:
+                    hdrs = element.get("headers") or []
+                    rows = element.get("rows") or []
+                    break
+            
+            if not hdrs or not rows:
+                continue
+                
+            for h in hdrs:
+                if not isinstance(h, str):
+                    continue
+                headers.add(h)
+            # collect small value samples by column index
+            for row in rows[:5]:
+                if not isinstance(row, list):
+                    continue
+                for i, value in enumerate(row):
+                    headerName = hdrs[i] if i < len(hdrs) else f"col_{i}"
+                    if headerName not in samples:
+                        samples[headerName] = []
+                    if len(samples[headerName]) < 5:
+                        samples[headerName].append(str(value))
+
+        return {
+            "tableHeaders": sorted(list(headers)),
+            "headerSamples": samples,
+        }
+
+    async def requestHeaderMapping(self, inventory: Dict[str, Any], cacheKey: str, canonicalSpec: Dict[str, Any] | None = None, mergePrompt: str | None = None) -> Dict[str, Any]:
+
+        # Allow caller to specify any canonical schema. If none provided, default to discovered headers.
+        if canonicalSpec is None:
+            canonicalSpec = {
+                "canonicalHeaders": inventory.get("tableHeaders", []),
+                "constraints": {}
+            }
+
+        # Protect merge prompt context by wrapping in single quotes and escaping internal quotes
+        protectedMerge = None
+        if mergePrompt:
+            try:
+                protectedMerge = str(mergePrompt).replace("'", "\\'")
+            except Exception:
+                protectedMerge = str(mergePrompt)
+
+        prompt = (
+            "You are a mapping generator. Return ONLY JSON.\n\n"
+            "Given discovered headers and sample values, map them to the canonical headers.\n"
+            "Do not invent fields. Use null if no mapping. Provide normalization policy.\n\n"
+            f"CANONICAL_SPEC:\n{json.dumps(canonicalSpec, ensure_ascii=False, indent=2)}\n\n"
+            f"HEADERS_DISCOVERED:\n{json.dumps(inventory, ensure_ascii=False, indent=2)}\n\n"
+            + (f"MERGE_PROMPT_CONTEXT (protected):\n'{protectedMerge}'\n\n" if protectedMerge is not None else "") +
+            "REPLY JSON SHAPE:\n(Example)\n"
+            "{\n  \"mappings\": {\"<sourceHeader>\": \"<Canonical>|null\"},\n"
+            "  \"normalizationPolicy\": {\n    \"TotalAmount\": {\"decimalSeparator\": \",\"|\".\"},\n"
+            "    \"Currency\": {\"stripSymbols\": true},\n"
+            "    \"Date\": {\"formats\": [\"DD.MM.YYYY\",\"YYYY-MM-DD\"]}\n  }\n}\n"
+        )
+
+        response = await self.services.ai.callAi(prompt=prompt)
+        if not response:
+            return {"mapping": {}, "normalizationPolicy": {}}
+        
+        # Extract JSON from response more safely
+        start_idx = response.find('{')
+        end_idx = response.rfind('}')
+        if start_idx == -1 or end_idx == -1 or start_idx >= end_idx:
+            return {"mapping": {}, "normalizationPolicy": {}}
+        
+        js = response[start_idx:end_idx + 1]
+        try:
+            mapping = json.loads(js)
+        except json.JSONDecodeError:
+            return {"mapping": {}, "normalizationPolicy": {}}
+        # Normalize key naming from AI: prefer single key "mapping"
+        if "mapping" not in mapping and "mappings" in mapping and isinstance(mapping["mappings"], dict):
+            mapping["mapping"] = mapping["mappings"]
+            try:
+                del mapping["mappings"]
+            except Exception:
+                pass
+        # Ensure canonicalHeaders present in mapping for downstream use
+        if "canonicalHeaders" not in mapping:
+            mapping["canonicalHeaders"] = canonicalSpec.get("canonicalHeaders", [])
+
+        # debug artifact
+        self._writeDebugArtifact("mapping.json", mapping)
+        return mapping
+
+    def applyMapping(self, mergedJson: Dict[str, Any], mappingSpec: Dict[str, Any]) -> Dict[str, Any]:
+        mappings = (mappingSpec or {}).get("mapping", {})
+        policy = (mappingSpec or {}).get("normalizationPolicy", {})
+
+        # Prefer headers provided by mapping (generic across domains)
+        canonicalHeaders = (mappingSpec or {}).get("canonicalHeaders") or []
+        if not canonicalHeaders:
+            # Fallback to union of mapped targets
+            canonicalHeaders = sorted(list({t for t in mappings.values() if t}))
+
+        rows: List[List[str]] = []
+        sections = mergedJson.get("sections", []) if isinstance(mergedJson, dict) else []
+        for section in sections:
+            # Use only the fundamental agreed JSON structure: content_type/elements
+            if section.get("content_type") != "table":
+                continue
+            
+            # Extract table data from elements array
+            sourceHeaders = []
+            sourceRows = []
+            for element in section.get("elements", []):
+                if isinstance(element, dict) and "headers" in element and "rows" in element:
+                    sourceHeaders = element.get("headers") or []
+                    sourceRows = element.get("rows") or []
+                    break
+            
+            if not sourceHeaders or not sourceRows:
+                continue
+
+            # Build index map: canonical -> source index or None
+            indexMap: Dict[str, int] = {}
+            for ci, ch in enumerate(canonicalHeaders):
+                srcIndex = None
+                for si, sh in enumerate(sourceHeaders):
+                    # Prefer explicit mapping target; fallback to identity when names match
+                    target = mappings.get(sh)
+                    if target is None and sh == ch:
+                        target = ch
+                    if target == ch:
+                        srcIndex = si
+                        break
+                indexMap[ch] = srcIndex
+
+            # Transform rows
+            for r in sourceRows:
+                canonicalRow: List[str] = []
+                for ch in canonicalHeaders:
+                    idx = indexMap.get(ch)
+                    try:
+                        value = r[idx] if (idx is not None and idx < len(r)) else ""
+                    except (IndexError, KeyError) as e:
+                        # Handle corrupted data gracefully
+                        value = ""
+                    canonicalRow.append(self._normalizeValue(ch, value, policy))
+                # consider as row if at least one non-empty meaningful field
+                if any(v.strip() for v in canonicalRow):
+                    rows.append(canonicalRow)
+
+        canonical = {
+            "metadata": {
+                "title": mergedJson.get("metadata", {}).get("title", "Merged Document"),
+                "source_documents": mergedJson.get("metadata", {}).get("source_documents", [])
+            },
+            "sections": [
+                {
+                    "id": "canonical_table_1",
+                    "content_type": "table",
+                    "elements": [
+                        {
+                            "headers": canonicalHeaders,
+                            "rows": rows
+                        }
+                    ],
+                    "order": 1
+                }
+            ]
+        }
+
+        # debug artifact
+        self._writeDebugArtifact("canonical_merged.json", canonical)
+        return canonical
+
+    def validateCanonical(self, canonicalJson: Dict[str, Any]) -> Dict[str, Any]:
+        rows = []
+        try:
+            sections = canonicalJson.get("sections", [])
+            for s in sections:
+                if s.get("content_type") == "table":
+                    # Extract rows from elements array
+                    for element in s.get("elements", []):
+                        if isinstance(element, dict) and "rows" in element:
+                            rows.extend(element.get("rows", []))
+        except Exception:
+            rows = []
+        report = {
+            "rowCount": len(rows),
+            "success": len(rows) > 0
+        }
+        self._writeDebugArtifact("normalization_report.json", report)
+        return report
+
+    # Internal helpers
+    def _normalizeValue(self, canonicalHeader: str, value: Any, policy: Dict[str, Any]) -> str:
+        if value is None:
+            return ""
+        text = str(value).strip()
+        # Generic normalization guided by policy; avoid domain specifics
+        if canonicalHeader in (policy.get("numericFields", []) or []):
+            dec = ((policy.get(canonicalHeader) or {}).get("decimalSeparator")
+                   or (policy.get("numeric") or {}).get("decimalSeparator")
+                   or ".")
+            if dec == ",":
+                text = text.replace(".", "").replace(",", ".") if "," in text else text
+            text = ''.join(ch for ch in text if ch.isdigit() or ch in ['.', '-', '+'])
+        elif (policy.get("text") or {}).get("stripSymbols") and canonicalHeader in (policy.get("text", {}).get("applyTo", []) or []):
+            text = ''.join(ch for ch in text if ch.isalpha())
+            text = text.upper()
+        return text
+
+    def _writeDebugArtifact(self, fileName: str, obj: Any) -> None:
+        try:
+            debugEnabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if not debugEnabled:
+                return
+            root = "./test-chat/ai"
+            os.makedirs(root, exist_ok=True)
+            # Prefix timestamp for files that are frequently overwritten
+            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+            if fileName in ("mapping.json", "canonical_merged.json"):
+                outName = f"{ts}_{fileName}"
+            else:
+                outName = fileName
+            path = os.path.join(root, outName)
+            with open(path, "w", encoding="utf-8") as f:
+                if isinstance(obj, (dict, list)):
+                    f.write(json.dumps(obj, ensure_ascii=False, indent=2))
+                else:
+                    f.write(str(obj))
+        except Exception:
+            pass
+
+
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@ -21,7 +21,7 @@ class SharepointService:
        
        Use setAccessTokenFromConnection() method to configure the access token before making API calls.
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.access_token = None
        self.base_url = "https://graph.microsoft.com/v1.0"
    
--- a/modules/services/serviceTicket/mainServiceTicket.py
+++ b/modules/services/serviceTicket/mainServiceTicket.py
@ -16,7 +16,7 @@ class TicketService:
        Args:
            serviceCenter: Service center instance for accessing other services
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter

    async def _createTicketInterfaceByType(
        self,
--- a/modules/services/serviceUtils/mainServiceUtils.py
+++ b/modules/services/serviceUtils/mainServiceUtils.py
@ -4,6 +4,7 @@ Provides centralized access to configuration, events, and other utilities.
 """

 import logging
+import os
 from typing import Any, Optional, Dict, Callable
 from modules.shared.configuration import APP_CONFIG
 from modules.shared.eventManagement import eventManager
@ -140,3 +141,42 @@ class UtilsService:
        except Exception as e:
            logger.error(f"Error getting fresh token for connection {connectionId}: {str(e)}")
            return None
+
+    def debugLogToFile(self, message: str, context: str = "DEBUG"):
+        """
+        Log debug message to file if debug logging is enabled.
+        
+        Args:
+            message: Debug message to log
+            context: Context identifier for the debug message
+        """
+        try:
+            # Check if debug logging is enabled
+            debug_enabled = self.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if not debug_enabled:
+                return
+            
+            # Get debug directory
+            debug_dir = self.configGet("APP_DEBUG_CHAT_WORKFLOW_DIR", "./test-chat")
+            if not os.path.isabs(debug_dir):
+                # If relative path, make it relative to the gateway directory
+                gateway_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+                debug_dir = os.path.join(gateway_dir, debug_dir)
+            
+            # Ensure debug directory exists
+            os.makedirs(debug_dir, exist_ok=True)
+            
+            # Create debug file path
+            debug_file = os.path.join(debug_dir, "debug_workflow.log")
+            
+            # Format the debug entry
+            timestamp = self.getUtcTimestamp()
+            debug_entry = f"[{timestamp}] [{context}] {message}\n"
+            
+            # Write to debug file
+            with open(debug_file, "a", encoding="utf-8") as f:
+                f.write(debug_entry)
+                
+        except Exception as e:
+            # Don't log debug errors to avoid recursion
+            pass
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@ -16,7 +16,7 @@ class WorkflowService:
    """Service class containing methods for document processing, chat operations, and workflow management"""
    
    def __init__(self, serviceCenter):
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.user = serviceCenter.user
        self.workflow = serviceCenter.workflow
        self.interfaceDbChat = serviceCenter.interfaceDbChat
@ -78,11 +78,15 @@ class WorkflowService:
    def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
        """Get ChatDocuments from a list of document references using all three formats."""
        try:
-            # Get the current workflow from services (same pattern as setWorkflowContext)
-            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
-            if not workflow:
-                logger.error("No workflow available for document list resolution")
-                return []
+            workflow = self.services.currentWorkflow
+            
+            # Reload workflow from database to ensure we have all messages
+            if hasattr(workflow, 'id'):
+                try:
+                    workflow = self.getWorkflow(workflow.id)
+                    logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages")
+                except Exception as e:
+                    logger.warning(f"Could not reload workflow from database: {str(e)}")
            
            all_documents = []
            for doc_ref in documentList:
@ -125,7 +129,9 @@ class WorkflowService:
                                break
                        
                        if not message_found:
-                            logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
+                            available_ids = [str(msg.id) for msg in workflow.messages]
+                            logger.error(f"Message with ID {message_id} not found in workflow. Available message IDs: {available_ids}")
+                            raise ValueError(f"Document reference not found: docList:{message_id}:{label}")
                    elif len(parts) >= 2:
                        # Format: docList:<label> - find message by documentsLabel
                        label = parts[1]
@ -154,7 +160,8 @@ class WorkflowService:
                            else:
                                logger.debug(f"Found docList reference {doc_ref} but message has no documents")
                        else:
-                            logger.debug(f"No messages found with documentsLabel: {label}")
+                            logger.error(f"No messages found with documentsLabel: {label}")
+                            raise ValueError(f"Document reference not found: docList:{label}")
                else:
                    # Direct label reference (round1_task2_action3_contextinfo)
                    # Search for messages with matching documentsLabel to find the actual documents
@ -198,30 +205,8 @@ class WorkflowService:
                                else:
                                    logger.debug(f"No documents found in newest message {newest_message.id}")
                            else:
-                                logger.debug(f"No messages found with documentsLabel: {doc_ref}")
-                                # Fallback: also check if any message has this documentsLabel as a prefix
-                                logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
-                                fallback_messages = []
-                                for message in workflow.messages:
-                                    msg_documents_label = getattr(message, 'documentsLabel', '')
-                                    if msg_documents_label and msg_documents_label.startswith(doc_ref):
-                                        fallback_messages.append(message)
-                                        logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
-                                
-                                if fallback_messages:
-                                    # Sort by publishedAt descending (newest first)
-                                    fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
-                                    newest_fallback = fallback_messages[0]
-                                    
-                                    logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
-                                    if newest_fallback.documents:
-                                        doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
-                                        logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
-                                        all_documents.extend(newest_fallback.documents)
-                                    else:
-                                        logger.debug(f"No documents found in fallback message {newest_fallback.id}")
-                                else:
-                                    logger.debug(f"No fallback messages found either")
+                                logger.error(f"No messages found with documentsLabel: {doc_ref}")
+                                raise ValueError(f"Document reference not found: {doc_ref}")
                        
            logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
            return all_documents
@ -260,7 +245,8 @@ class WorkflowService:
            token_status = f"error: {str(e)}"
        
        # Build enhanced reference with state information
-        base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
+        # Format: connection:msft:<username> (without UUID)
+        base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}"
        state_info = f" [status:{connection.status.value}, token:{token_status}]"
        
        logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
@ -283,26 +269,25 @@ class WorkflowService:
            return None
    
    def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
-        """Get UserConnection from reference string (handles both old and enhanced formats)"""
+        """Get UserConnection from reference string (handles new format without UUID)"""
        try:
-            # Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
+            # Parse reference format: connection:{authority}:{username} [status:..., token:...]
            # Remove state information if present
            base_reference = connectionReference.split(' [')[0]
            
            parts = base_reference.split(':')
-            if len(parts) != 4 or parts[0] != "connection":
+            if len(parts) != 3 or parts[0] != "connection":
                return None
            
            authority = parts[1]
            username = parts[2]
-            conn_id = parts[3]
            
            # Get user connections through AppObjects interface
            user_connections = self.interfaceDbApp.getUserConnections(self.user.id)
            
-            # Find matching connection
+            # Find matching connection by authority and username (no UUID needed)
            for conn in user_connections:
-                if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
+                if conn.authority.value == authority and conn.externalUsername == username:
                    return conn
            return None
            
@ -437,11 +422,7 @@ class WorkflowService:
    def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
        """Set current workflow context for document generation and routing"""
        try:
-            # Get the current workflow from services
-            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
-            if not workflow:
-                logger.error("No workflow available for context setting")
-                return
+            workflow = self.services.currentWorkflow
            
            # Prepare update data
            update_data = {}
@ -548,10 +529,7 @@ class WorkflowService:
    def getDocumentCount(self) -> str:
        """Get document count for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
-            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
-            if not workflow:
-                return "No documents available"
+            workflow = self.services.currentWorkflow
            
            # Count documents from all messages in the workflow (like old system)
            total_docs = 0
@ -570,10 +548,7 @@ class WorkflowService:
    def getWorkflowHistoryContext(self) -> str:
        """Get workflow history context for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
-            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
-            if not workflow:
-                return "No previous round context available"
+            workflow = self.services.currentWorkflow
            
            # Check if there are any previous rounds by looking for "first" messages
            has_previous_rounds = False
@ -622,15 +597,26 @@ class WorkflowService:
            if not workflow or not hasattr(workflow, 'messages'):
                return "No documents available"
            
+            # Use the provided workflow object directly to avoid database reload issues
+            # that can cause filename truncation. The workflow object should already be up-to-date.
+            logger.debug(f"Using provided workflow object for getAvailableDocuments (ID: {workflow.id if hasattr(workflow, 'id') else 'unknown'})")
+            
+            # Debug: Check document filenames in the workflow object
+            if hasattr(workflow, 'messages') and workflow.messages:
+                for message in workflow.messages:
+                    if hasattr(message, 'documents') and message.documents:
+                        for doc in message.documents:
+                            logger.debug(f"Workflow document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
+            
            # Get document reference list using the exact same logic as old system
            document_list = self._getDocumentReferenceList(workflow)
            
-            # Build technical context string for AI action planning (exact copy of old system)
-            context = "AVAILABLE DOCUMENTS:\n\n"
+            # Build index string for AI action planning
+            context = ""
            
-            # Process chat exchanges (current round) - exact copy of old system
+            # Process current round exchanges first
            if document_list["chat"]:
-                context += "CURRENT ROUND DOCUMENTS:\n"
+                context += "\nCurrent round documents:\n"
                for exchange in document_list["chat"]:
                    # Generate docList reference for the exchange (using message ID and label)
                    # Find the message that corresponds to this exchange
@ -656,9 +642,9 @@ class WorkflowService:
                            context += f"  - docItem:{doc_ref}\n"
                context += "\n"
            
-            # Process history exchanges (previous rounds) - exact copy of old system
+            # Process previous rounds after
            if document_list["history"]:
-                context += "WORKFLOW HISTORY DOCUMENTS:\n"
+                context += "\nPast rounds documents:\n"
                for exchange in document_list["history"]:
                    # Generate docList reference for the exchange (using message ID and label)
                    # Find the message that corresponds to this exchange
@ -685,7 +671,7 @@ class WorkflowService:
                context += "\n"
            
            if not document_list["chat"] and not document_list["history"]:
-                context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
+                context += "\nNO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
            
            return context
            
@ -713,39 +699,23 @@ class WorkflowService:
        for message in reversed(workflow.messages):
            is_first = message.status == "first" if hasattr(message, 'status') else False
            
-            # Build a DocumentExchange if message has documents
+            # Build a DocumentExchange if message has documents and an explicit documentsLabel
            doc_exchange = None
            if message.documents:
-                if message.actionId and message.documentsLabel:
-                    # Validate that we use the same label as in the message
+                existing_label = getattr(message, 'documentsLabel', None)
+                if existing_label:
+                    # Validate and use the message's actual documentsLabel
                    validated_label = self._validateDocumentLabelConsistency(message)
-                    
-                    # Use the message's actual documentsLabel
                    doc_refs = []
                    for doc in message.documents:
                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
                        doc_refs.append(doc_ref)
-                    
                    doc_exchange = {
                        'documentsLabel': validated_label,
                        'documents': doc_refs
                    }
-                else:
-                    # Generate new labels for documents without explicit labels
-                    doc_refs = []
-                    for doc in message.documents:
-                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
-                        doc_refs.append(doc_ref)
-                    
-                    if doc_refs:
-                        # Create a label based on message context
-                        context_prefix = self._generateWorkflowContextPrefix(message)
-                        context_label = f"{context_prefix}_context"
-                        
-                        doc_exchange = {
-                            'documentsLabel': context_label,
-                            'documents': doc_refs
-                        }
+                # IMPORTANT: Never synthesize new labels here. If a message lacks
+                # a documentsLabel, we skip adding an exchange for it.
            
            # Append to appropriate container based on boundary
            if doc_exchange:
@ -773,12 +743,22 @@ class WorkflowService:
        """Update file attributes (fileName, fileSize, mimeType) for documents"""
        for doc in documents:
            try:
+                # Debug: Log original filename before refresh
+                original_filename = doc.fileName
+                logger.debug(f"Before refresh - Document {doc.id}: fileName='{original_filename}' (length: {len(original_filename)})")
+                
                # Use the proper WorkflowService method to get file info
                file_info = self.getFileInfo(doc.fileId)
                if file_info:
+                    db_filename = file_info.get("fileName", doc.fileName)
+                    logger.debug(f"Database filename for {doc.id}: '{db_filename}' (length: {len(db_filename)})")
+                    
                    doc.fileName = file_info.get("fileName", doc.fileName)
                    doc.fileSize = file_info.get("size", doc.fileSize)
                    doc.mimeType = file_info.get("mimeType", doc.mimeType)
+                    
+                    # Debug: Log final filename after refresh
+                    logger.debug(f"After refresh - Document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
                else:
                    logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
            except Exception as e:
@ -794,6 +774,8 @@ class WorkflowService:
    def _getDocumentReferenceFromChatDocument(self, document, message) -> str:
        """Get document reference using document ID and filename."""
        try:
+            # Debug logging to track filename truncation
+            logger.debug(f"Creating document reference for {document.id}: fileName='{document.fileName}' (length: {len(document.fileName)})")
            # Use document ID and filename for simple reference
            return f"docItem:{document.id}:{document.fileName}"
        except Exception as e:
@ -844,14 +826,14 @@ class WorkflowService:
        """Get connection reference list (matching old handlingTasks.py logic)"""
        try:
            # Get connections from the database using the same logic as the old system
-            if hasattr(self.serviceCenter, 'interfaceDbApp') and hasattr(self.serviceCenter, 'user'):
-                userId = self.serviceCenter.user.id
-                connections = self.serviceCenter.interfaceDbApp.getUserConnections(userId)
+            if hasattr(self.services, 'interfaceDbApp') and hasattr(self.services, 'user'):
+                userId = self.services.user.id
+                connections = self.services.interfaceDbApp.getUserConnections(userId)
                if connections:
                    # Format connections as reference strings using the same pattern as the old system
                    connectionRefs = []
                    for conn in connections:
-                        # Create reference string in format: connection:{authority}:{username}:{id} [status:..., token:...]
+                        # Create reference string in format: connection:{authority}:{username} [status:..., token:...]
                        # This matches the format expected by getUserConnectionFromConnectionReference()
                        ref = self.getConnectionReferenceFromUserConnection(conn)
                        connectionRefs.append(ref)
--- a/modules/workflows/methods/_EXCLUDED_methodDocument.py
+++ b/modules/workflows/methods/_EXCLUDED_methodDocument.py
@ -42,9 +42,7 @@ class MethodDocument(MethodBase):
        - operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
        - processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
        - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
-        - mergeStrategy (dict, optional): Merge strategy for chunked content.
-        - expectedDocumentFormats (list, optional): Desired output format specs.
-        - includeMetadata (bool, optional): Include file metadata. Default: True.
+        - outputMimeType (str, optional): MIME type for output file. Options: "text/plain" (default), "application/json", "text/csv", "text/html". Default: "text/plain".
        """
        try:
            documentList = parameters.get("documentList")
@ -54,13 +52,7 @@ class MethodDocument(MethodBase):
            operationType = parameters.get("operationType", "extract_content")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
-            mergeStrategy = parameters.get("mergeStrategy", {
-                "groupBy": "typeGroup",
-                "orderBy": "id",
-                "mergeType": "concatenate"
-            })
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
-            includeMetadata = parameters.get("includeMetadata", True)
+            outputMimeType = parameters.get("outputMimeType", "text/plain")
            
            if not documentList:
                return ActionResult.isFailure(
@ -87,19 +79,16 @@ class MethodDocument(MethodBase):
                    compressContext=not chunkAllowed
                )
                
-                # Add format instructions to prompt if expected formats are provided
+                # Add format instructions to prompt based on MIME type
                enhanced_prompt = prompt
-                if expectedDocumentFormats:
-                    format_instructions = []
-                    for fmt in expectedDocumentFormats:
-                        extension = fmt.get("extension", ".txt")
-                        mime_type = fmt.get("mimeType", "text/plain")
-                        description = fmt.get("description", "")
-                        format_instructions.append(f"- {extension} ({mime_type}): {description}")
-                    
-                    if format_instructions:
-                        enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
-                        enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
+                mime_type_mapping = {
+                    "text/plain": (".txt", "Plain text format"),
+                    "application/json": (".json", "Structured JSON format"),
+                    "text/csv": (".csv", "Table format"),
+                    "text/html": (".html", "HTML format")
+                }
+                extension, description = mime_type_mapping.get(outputMimeType, (".txt", "Plain text format"))
+                enhanced_prompt += f"\n\nPlease format the output as {extension} ({outputMimeType}): {description}"
                
                # Use enhanced AI service for extraction
                ai_response = await self.services.ai.callAi(
@ -125,8 +114,16 @@ class MethodDocument(MethodBase):
            for i, chatDocument in enumerate(chatDocuments):
                # Use the AI response directly - it already contains processed content
                final_content = ai_response
-                final_mime_type = "text/plain"
-                final_extension = ".txt"
+                
+                # Determine output format based on MIME type
+                mime_type_mapping = {
+                    "text/plain": ".txt",
+                    "application/json": ".json",
+                    "text/csv": ".csv",
+                    "text/html": ".html"
+                }
+                final_extension = mime_type_mapping.get(outputMimeType, ".txt")
+                final_mime_type = outputMimeType
                
                # Create meaningful output fileName with workflow context
                original_fileName = chatDocument.fileName
@ -156,9 +153,6 @@ class MethodDocument(MethodBase):
                error=str(e)
            )

-
-
-
    @action
    async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
        """
@ -175,8 +169,6 @@ class MethodDocument(MethodBase):
        - operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
        - processDocumentsIndividually (bool, optional): Process per document. Default: True.
        - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
-        - mergeStrategy (dict, optional): Merging rules for multi-part generation.
-        - includeMetadata (bool, optional): Include file metadata. Default: True.
        """
        try:
            documentList = parameters.get("documentList")
@ -188,12 +180,6 @@ class MethodDocument(MethodBase):
            operationType = parameters.get("operationType", "generate_report")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
-            mergeStrategy = parameters.get("mergeStrategy", {
-                "groupBy": "typeGroup",
-                "orderBy": "id",
-                "mergeType": "concatenate"
-            })
-            includeMetadata = parameters.get("includeMetadata", True)
            
            if not documentList:
                return ActionResult.isFailure(
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -31,14 +31,14 @@ class MethodAi(MethodBase):
    async def process(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
-        - Purpose: AI-based analysis and content generation with optional document context.
-        - Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
-        - Output format: Single or multiple documents in requested format.
+        - Purpose: Process a user prompt with optional unlimited input documents to produce one or many output documents of the SAME format.
+        - Input requirements: aiPrompt (required); optional documentList.
+        - Output format: Exactly one file format to select. For multiple output file formats to do different calls.

        Parameters:
        - aiPrompt (str, required): Instruction for the AI.
        - documentList (list, optional): Document reference(s) for context.
-        - resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
+        - resultType (str, optional): Output file extension - only one extension allowed (e.g. txt, json, md, csv, xml, html, pdf, docx, xlsx, png, ...). Default: txt.
        - processingMode (str, optional): basic | advanced | detailed. Default: basic.
        - includeMetadata (bool, optional): Include metadata when available. Default: True.
        - operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
@ -169,12 +169,12 @@ class MethodAi(MethodBase):
        Parameters:
        - user_prompt (str, required): Research question or topic.
        - urls (list, optional): Specific URLs to crawl.
-        - max_results (int, optional): Max search results. Default: 10.
-        - max_pages (int, optional): Max pages to crawl per site. Default: 10.
+        - max_results (int, optional): Max search results. Default: 5.
+        - max_pages (int, optional): Max pages to crawl per site. Default: 5.
        - search_depth (str, optional): basic | advanced. Default: basic.
        - extract_depth (str, optional): basic | advanced. Default: advanced.
        - pages_search_depth (int, optional): Crawl depth level. Default: 2.
-        - country (str, optional): Country code for bias.
+        - country (str, optional): Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries).
        - time_range (str, optional): d | w | m | y.
        - topic (str, optional): general | news | academic.
        - language (str, optional): Language code (e.g., de, en, fr).
@ -182,8 +182,8 @@ class MethodAi(MethodBase):
        try:
            user_prompt = parameters.get("user_prompt")
            urls = parameters.get("urls")
-            max_results = parameters.get("max_results", 10)
-            max_pages = parameters.get("max_pages", 10)
+            max_results = parameters.get("max_results", 5)
+            max_pages = parameters.get("max_pages", 5)
            search_depth = parameters.get("search_depth", "basic")
            extract_depth = parameters.get("extract_depth", "advanced")
            pages_search_depth = parameters.get("pages_search_depth", 2)
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -154,6 +154,12 @@ class MethodOutlook(MethodBase):
        if not query or not query.strip():
            # No query specified, just get emails from folder
            if folder and folder.lower() != "all":
+                # Use folder name directly for well-known folders, or get folder ID
+                if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
+                    params["$filter"] = f"parentFolderId eq '{folder}'"
+                else:
+                    # For custom folders, we need to get the folder ID first
+                    # This will be handled by the calling method
                    params["$filter"] = f"parentFolderId eq '{folder}'"
            # Add orderby for basic queries
            params["$orderby"] = "receivedDateTime desc"
@ -191,6 +197,16 @@ class MethodOutlook(MethodBase):

            
            # Use only subject search to keep filter simple
+            # Handle wildcard queries specially
+            if clean_query == "*" or clean_query == "":
+                # For wildcard or empty query, don't use contains filter
+                # Just use folder filter if specified
+                if folder and folder.lower() != "all":
+                    params["$filter"] = f"parentFolderId eq '{folder}'"
+                else:
+                    # No filter needed for wildcard search across all folders
+                    pass
+            else:
                params["$filter"] = f"contains(subject,'{clean_query}')"
                
                # Add folder filter if specified
@ -235,6 +251,10 @@ class MethodOutlook(MethodBase):
        if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
            return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
        
+        # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
+        if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
+            return {"$filter": filter_text}
+        
        # Handle text content - search in subject
        return {"$filter": f"contains(subject,'{filter_text}')"}
    
@ -300,26 +320,31 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Read emails and metadata from a mailbox folder.
-        - Input requirements: connectionReference (required); optional folder, limit, filter, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
        - Output format: JSON with emails and metadata.

        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - folder (str, optional): Folder to read from. Default: Inbox.
-        - limit (int, optional): Maximum items to return. Default: 10.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
        - filter (str, optional): Sender, query operators, or subject text.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            folder = parameters.get("folder", "Inbox")
            limit = parameters.get("limit", 10)
            filter = parameters.get("filter")
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
            
+            # Validate limit parameter
+            if limit <= 0:
+                limit = 1000
+                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+            
            # Validate filter parameter if provided
            if filter:
                # Remove any potentially dangerous characters that could break the filter
@ -343,8 +368,16 @@ class MethodOutlook(MethodBase):
                    "Content-Type": "application/json"
                }
                
-                # Build the API request
+                # Get the folder ID for the specified folder
+                folder_id = self._getFolderId(folder, connection)
+                
+                if folder_id:
+                    # Build the API request with folder ID
+                    api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
+                else:
+                    # Fallback: use folder name directly (for well-known folders like "Inbox")
                    api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
+                    logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
                params = {
                    "$top": limit,
                    "$orderby": "receivedDateTime desc"
@ -380,7 +413,11 @@ class MethodOutlook(MethodBase):
                    "count": len(emails_data.get("value", [])),
                    "folder": folder,
                    "filter": filter,
-                    "apiResponse": emails_data
+                    "apiMetadata": {
+                        "@odata.context": emails_data.get("@odata.context"),
+                        "@odata.count": emails_data.get("@odata.count"),
+                        "@odata.nextLink": emails_data.get("@odata.nextLink")
+                    }
                }
                

@ -405,18 +442,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
            
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
            

            
@ -454,27 +488,32 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Search emails by query and return matching items with metadata.
-        - Input requirements: connectionReference (required); query (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
        - Output format: JSON with search results and metadata.

        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - query (str, required): Search expression.
        - folder (str, optional): Folder scope or All. Default: All.
-        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            query = parameters.get("query")
            folder = parameters.get("folder", "All")
-            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            limit = parameters.get("limit", 1000)
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            
            # Validate parameters
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
            
+            # Validate limit parameter
+            if limit <= 0:
+                limit = 1000
+                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
+            
            if not query or not query.strip():
                return ActionResult.isFailure(error="Search query is required and cannot be empty")
            
@ -488,12 +527,15 @@ class MethodOutlook(MethodBase):
            # Validate limit
            try:
                limit = int(limit)
-                if limit <= 0 or limit > 1000:  # Microsoft Graph API has limits
-                    limit = 20
-                    logger.warning(f"Limit {limit} is out of range, using default value 20")
+                if limit <= 0:
+                    limit = 1000
+                    logger.warning(f"Invalid limit value (<=0), using default value 1000")
+                elif limit > 1000:  # Microsoft Graph API has limits
+                    limit = 1000
+                    logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
            except (ValueError, TypeError):
-                limit = 20
-                logger.warning(f"Invalid limit value, using default value 20")
+                limit = 1000
+                logger.warning(f"Invalid limit value, using default value 1000")
            
            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
@ -509,9 +551,18 @@ class MethodOutlook(MethodBase):
                    "Content-Type": "application/json"
                }
                
+                # Get the folder ID for the specified folder if needed
+                folder_id = None
+                if folder and folder.lower() != "all":
+                    folder_id = self._getFolderId(folder, connection)
+                    if folder_id:
+                        logger.debug(f"Found folder ID for '{folder}': {folder_id}")
+                    else:
+                        logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
+                
                # Build the search API request
                api_url = f"{graph_url}/me/messages"
-                params = self._buildSearchParameters(query, folder, limit)
+                params = self._buildSearchParameters(query, folder_id or folder, limit)
                
                # Log search parameters for debugging
                logger.debug(f"Search query: '{query}'")
@ -605,7 +656,11 @@ class MethodOutlook(MethodBase):
                    "count": len(emails),
                    "folder": folder,
                    "limit": limit,
-                    "apiResponse": search_data,
+                    "apiMetadata": {
+                        "@odata.context": search_data.get("@odata.context"),
+                        "@odata.count": search_data.get("@odata.count"),
+                        "@odata.nextLink": search_data.get("@odata.nextLink")
+                    },
                    "searchParams": params
                }
                
@ -618,18 +673,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
            
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
            

            
@ -664,20 +716,20 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: List draft emails from a folder.
-        - Input requirements: connectionReference (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, outputMimeType.
        - Output format: JSON with draft items and metadata.

        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - folder (str, optional): Drafts folder to list. Default: Drafts.
-        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            folder = parameters.get("folder", "Drafts")
-            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            limit = parameters.get("limit", 1000)
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -745,18 +797,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error listing drafts via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to list drafts: {str(e)}")
            
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
            

            
@ -790,18 +839,18 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Find draft emails across folders.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
        - Output format: JSON with drafts and metadata.

        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - limit (int, optional): Maximum items to return. Default: 50.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            limit = parameters.get("limit", 50)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -859,18 +908,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error finding drafts via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to find drafts: {str(e)}")
            
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
            

            
@ -930,18 +976,18 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Check contents of the Drafts folder.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
        - Output format: JSON with drafts and metadata.

        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -1003,18 +1049,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error checking Drafts folder via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to check Drafts folder: {str(e)}")
            
-            # Determine output format based on expected formats
-            output_extension = ".json"  # Default
-            output_mime_type = "application/json"  # Default
-            
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
-                # Use the first expected format
-                expected_format = expectedDocumentFormats[0]
-                output_extension = expected_format.get("extension", ".json")
-                output_mime_type = expected_format.get("mimeType", "application/json")
-                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
-            else:
-                logger.info("No expected format specified, using default .json format")
+            # Determine output format based on MIME type
+            mime_type_mapping = {
+                "application/json": ".json",
+                "text/plain": ".txt", 
+                "text/csv": ".csv"
+            }
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
+            output_mime_type = outputMimeType
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
            

            
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@ -931,7 +931,8 @@ class MethodSharepoint(MethodBase):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
                
                # Check if pathQuery contains search terms (words without proper path structure)
-                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+                valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+                if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
                
                # For pathQuery, we need to discover sites to find the specific one
@ -1627,7 +1628,8 @@ class MethodSharepoint(MethodBase):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
                
                # Check if pathQuery contains search terms (words without proper path structure)
-                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+                valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
+                if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
                
                # For pathQuery, we need to discover sites to find the specific one
--- a/modules/workflows/processing/adaptive/init.py
+++ b/modules/workflows/processing/adaptive/init.py
@ -1,9 +1,9 @@
 # adaptive module for React mode
 # Provides adaptive learning capabilities

-from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
+from .intentAnalyzer import IntentAnalyzer
 from .contentValidator import ContentValidator
 from .learningEngine import LearningEngine
 from .progressTracker import ProgressTracker

-__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
+__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker']
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -1,8 +1,9 @@
 # contentValidator.py
 # Content validation for adaptive React mode

-import re
 import logging
+import json
+import re
 from typing import List, Dict, Any

 logger = logging.getLogger(__name__)
@ -10,34 +11,14 @@ logger = logging.getLogger(__name__)
 class ContentValidator:
    """Validates delivered content against user intent"""
    
-    def __init__(self):
-        pass
+    def __init__(self, services=None):
+        self.services = services
    
-    def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
-        """Validates delivered content against user intent"""
+    async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
+        """Validates delivered content against user intent using AI"""
        try:
-            validationDetails = []
-            
-            for doc in documents:
-                content = self._extractContent(doc)
-                detail = self._validateSingleDocument(content, doc, intent)
-                validationDetails.append(detail)
-            
-            # Calculate overall success
-            overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
-            
-            # Calculate quality score
-            qualityScore = self._calculateQualityScore(validationDetails)
-            
-            # Generate improvement suggestions
-            improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
-            
-            return {
-                "overallSuccess": overallSuccess,
-                "qualityScore": qualityScore,
-                "validationDetails": validationDetails,
-                "improvementSuggestions": improvementSuggestions
-            }
+            # Use AI for comprehensive validation
+            return await self._validateWithAI(documents, intent)
            
        except Exception as e:
            logger.error(f"Error validating content: {str(e)}")
@ -56,253 +37,236 @@ class ContentValidator:
        except Exception:
            return ""
    
-    def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
-        """Validates a single document against intent"""
-        # Check data type match
-        dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
-        
-        # Check format match
-        formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
-        
-        # Calculate quality score
-        qualityScore = self._calculateDocumentQualityScore(content, intent)
-        
-        # Check success criteria
-        successCriteriaMet = self._checkSuccessCriteria(content, intent)
-        
-        # Identify specific issues
-        specificIssues = self._identifySpecificIssues(content, intent)
-        
-        # Generate improvement suggestions
-        improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
-        
-        return {
-            "documentName": getattr(doc, 'documentName', 'Unknown'),
-            "dataTypeMatch": dataTypeMatch,
-            "formatMatch": formatMatch,
-            "qualityScore": qualityScore,
-            "successCriteriaMet": successCriteriaMet,
-            "specificIssues": specificIssues,
-            "improvementSuggestions": improvementSuggestions
-        }
-    
-    def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
-        """Checks if content matches the expected data type"""
-        if dataType == "numbers":
-            return self._containsNumbers(content)
-        elif dataType == "text":
-            return self._containsText(content)
-        elif dataType == "documents":
-            return self._containsDocumentContent(content)
-        elif dataType == "analysis":
-            return self._containsAnalysis(content)
-        elif dataType == "code":
-            return self._containsCode(content)
-        else:
-            return True  # Unknown type, assume match
-    
-    def _containsNumbers(self, content: str) -> bool:
-        """Checks if content contains actual numbers (not code)"""
-        # Look for actual numbers in the content
-        numbers = re.findall(r'\b\d+\b', content)
-        
-        # Check if it's code (contains function definitions, etc.)
-        isCode = any(keyword in content.lower() for keyword in [
-            'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
-            'return', 'print(', 'console.log', 'public ', 'private '
-        ])
-        
-        # If it's code, it doesn't contain actual numbers
-        if isCode:
-            return False
-        
-        # If it has numbers and it's not code, it contains actual numbers
-        return len(numbers) > 0
-    
-    def _containsText(self, content: str) -> bool:
-        """Checks if content contains readable text"""
-        # Remove numbers and special characters
-        textContent = re.sub(r'[^\w\s]', '', content)
-        words = textContent.split()
-        
-        # Check if there are enough words to be considered text
-        return len(words) > 5
-    
-    def _containsDocumentContent(self, content: str) -> bool:
-        """Checks if content is suitable for document creation"""
-        # Check for structured content
-        hasStructure = any(indicator in content for indicator in [
-            '\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
-        ])
-        
-        # Check for meaningful content
-        hasMeaningfulContent = len(content.strip()) > 50
-        
-        return hasStructure and hasMeaningfulContent
-    
-    def _containsAnalysis(self, content: str) -> bool:
-        """Checks if content contains analysis"""
-        analysisIndicators = [
-            'analysis', 'findings', 'conclusion', 'summary', 'insights',
-            'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
-        ]
-        
-        contentLower = content.lower()
-        return any(indicator in contentLower for indicator in analysisIndicators)
-    
-    def _containsCode(self, content: str) -> bool:
-        """Checks if content contains code"""
-        codeIndicators = [
-            'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
-            'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
-            'int ', 'string ', 'var ', 'let ', 'const '
-        ]
-        
-        contentLower = content.lower()
-        return any(indicator in contentLower for indicator in codeIndicators)
-    
-    def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
-        """Checks if content matches expected format"""
-        if expectedFormat == "raw_data":
-            # Raw data should be simple, not heavily formatted
-            return not any(indicator in content for indicator in [
-                '<html>', '<div>', '<table>', '## ', '### ', '**', '__'
-            ])
-        elif expectedFormat == "formatted":
-            # Formatted content should have structure
-            return any(indicator in content for indicator in [
-                '\n', '\t', '|', '-', '*', '1.', '2.', '•'
-            ])
-        elif expectedFormat == "structured":
-            # Structured content should have clear organization
-            return any(indicator in content for indicator in [
-                '{', '}', '[', ']', '|', '\t', '  '
-            ])
-        else:
-            return True  # Unknown format, assume match
-    
-    def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
-        """Checks if content meets success criteria"""
-        criteriaMet = []
-        successCriteria = intent.get("successCriteria", [])
-        
-        for criterion in successCriteria:
-            if 'prime numbers' in criterion.lower():
-                # Check if content contains actual prime numbers, not code
-                hasNumbers = bool(re.search(r'\b\d+\b', content))
-                isNotCode = not any(keyword in content.lower() for keyword in [
-                    'def ', 'function', 'import ', 'class '
-                ])
-                criteriaMet.append(hasNumbers and isNotCode)
-            elif 'document' in criterion.lower():
-                # Check if content is suitable for document creation
-                hasStructure = any(indicator in content for indicator in [
-                    '\n', '\t', '|', '-', '*', '1.', '2.'
-                ])
-                criteriaMet.append(hasStructure)
-            elif 'format' in criterion.lower():
-                # Check if content is properly formatted
-                hasFormatting = any(indicator in content for indicator in [
-                    '\n', '\t', '|', '-', '*', '1.', '2.', '•'
-                ])
-                criteriaMet.append(hasFormatting)
-            else:
-                # Generic check - content should not be empty
-                criteriaMet.append(len(content.strip()) > 0)
-        
-        return criteriaMet
-    
-    def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
-        """Calculates quality score for a single document"""
-        score = 0.0
-        
-        # Base score for having content
-        if len(content.strip()) > 0:
-            score += 0.2
-        
-        # Score for data type match
-        if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
-            score += 0.3
-        
-        # Score for format match
-        if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
-            score += 0.2
-        
-        # Score for success criteria
-        successCriteriaMet = self._checkSuccessCriteria(content, intent)
-        if successCriteriaMet:
-            successRate = sum(successCriteriaMet) / len(successCriteriaMet)
-            score += 0.3 * successRate
-        
-        return min(score, 1.0)
-    
-    def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
-        """Calculates overall quality score from validation details"""
-        if not validationDetails:
-            return 0.0
-        
-        totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
-        return totalScore / len(validationDetails)
-    
-    def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
-        """Identifies specific issues with the content"""
-        issues = []
-        
-        # Check for common issues
-        if intent.get("dataType") == "numbers" and self._containsCode(content):
-            issues.append("Content contains code instead of actual numbers")
-        
-        if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
-            issues.append("Content is formatted when raw data was requested")
-        
-        if len(content.strip()) == 0:
-            issues.append("Content is empty")
-        
-        return issues
-    
-    def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
-        """Generates improvement suggestions for a single document"""
-        suggestions = []
-        
-        dataType = intent.get("dataType", "unknown")
-        expectedFormat = intent.get("expectedFormat", "unknown")
-        
-        if dataType == "numbers" and self._containsCode(content):
-            suggestions.append("Deliver actual numbers, not code to generate them")
-        
-        if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
-            suggestions.append("Provide raw data without formatting")
-        
-        if len(content.strip()) == 0:
-            suggestions.append("Provide actual content")
-        
-        return suggestions
-    
-    def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]], 
-                                      intent: Dict[str, Any]) -> List[str]:
-        """Generates improvement suggestions based on validation results"""
-        suggestions = []
-        
-        # Check for common issues
-        if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
-            dataType = intent.get("dataType", "unknown")
-            suggestions.append(f"Content should contain {dataType} data, not code or other formats")
-        
-        if not any(detail.get("formatMatch", False) for detail in validationDetails):
-            expectedFormat = intent.get("expectedFormat", "unknown")
-            suggestions.append(f"Content should be in {expectedFormat} format")
-        
-        # Add specific suggestions from validation details
-        for detail in validationDetails:
-            suggestions.extend(detail.get("improvementSuggestions", []))
-        
-        return list(set(suggestions))  # Remove duplicates
-    
    def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
        """Creates a failed validation result"""
        return {
            "overallSuccess": False,
            "qualityScore": 0.0,
            "validationDetails": [],
-            "improvementSuggestions": [f"Validation failed: {error}"]
+            "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
        }
+    
+    def _isValidJsonResponse(self, response: str) -> bool:
+        """Checks if response contains valid JSON structure"""
+        try:
+            import re
+            # Look for JSON with expected structure
+            json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
+            if json_match:
+                json.loads(json_match.group(0))
+                return True
+            return False
+        except:
+            return False
+    
+    def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
+        """Extracts validation result from malformed AI response"""
+        try:
+            import re
+            
+            # Extract key values using regex patterns
+            overall_success = re.search(r'"overallSuccess"\s*:\s*(true|false)', response, re.IGNORECASE)
+            quality_score = re.search(r'"qualityScore"\s*:\s*([0-9.]+)', response)
+            gap_analysis = re.search(r'"gapAnalysis"\s*:\s*"([^"]*)"', response)
+            
+            # Determine overall success from context if not found
+            if not overall_success:
+                # Look for positive/negative indicators in the text
+                if any(word in response.lower() for word in ['success', 'complete', 'fulfilled', 'satisfied']):
+                    overall_success = True
+                elif any(word in response.lower() for word in ['failed', 'incomplete', 'missing', 'error']):
+                    overall_success = False
+                else:
+                    overall_success = False
+            
+            return {
+                "overallSuccess": overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else False),
+                "qualityScore": float(quality_score.group(1)) if quality_score else 0.5,
+                "validationDetails": [{
+                    "documentName": "AI Validation (Fallback)",
+                    "gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
+                    "successCriteriaMet": [False]  # Conservative fallback
+                }],
+                "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"]
+            }
+        except Exception as e:
+            logger.error(f"Fallback extraction failed: {str(e)}")
+            return None
+    
+    async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
+        """AI-based comprehensive validation - single main function"""
+        try:
+            if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
+                return self._createFailedValidationResult("AI service not available")
+            
+            # Extract content from all documents
+            documentContents = []
+            for doc in documents:
+                content = self._extractContent(doc)
+                documentContents.append({
+                    "name": getattr(doc, 'documentName', 'Unknown'),
+                    "content": content[:2000]  # Limit content for AI processing
+                })
+            
+            # Create comprehensive AI validation prompt
+            validationPrompt = f"""
+You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
+
+USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
+EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
+EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
+SUCCESS CRITERIA: {intent.get('successCriteria', [])}
+
+DELIVERED CONTENT:
+{json.dumps(documentContents, indent=2)}
+
+Perform comprehensive validation:
+1. Check if content matches expected data type
+2. Check if content matches expected format
+3. Verify success criteria are met
+4. Assess overall quality and completeness
+5. Identify specific gaps and issues
+6. Provide actionable next steps
+
+CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT.
+
+RESPOND WITH THIS EXACT JSON FORMAT:
+
+{{
+    "overallSuccess": false,
+    "qualityScore": 0.5,
+    "dataTypeMatch": false,
+    "formatMatch": false,
+    "successCriteriaMet": [false, false],
+    "gapAnalysis": "Content does not match expected format and lacks required elements",
+    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
+    "validationDetails": [
+        {{
+            "documentName": "Content Validation",
+            "issues": ["Format mismatch", "Missing required elements"],
+            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
+        }}
+    ]
+}}
+"""
+            
+            # Call AI service for validation
+            from modules.datamodels.datamodelAi import AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            response = await self.services.ai.callAi(
+                prompt=validationPrompt,
+                documents=None,
+                options=request_options
+            )
+            
+            # If first attempt fails, try with more explicit prompt
+            if response and not self._isValidJsonResponse(response):
+                logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt")
+                explicitPrompt = f"""
+VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS
+
+Request: {intent.get('primaryGoal', 'Unknown')}
+Data Type: {intent.get('dataType', 'unknown')}
+Format: {intent.get('expectedFormat', 'unknown')}
+Criteria: {intent.get('successCriteria', [])}
+
+Content: {json.dumps(documentContents, indent=2)}
+
+RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
+
+{{
+    "overallSuccess": false,
+    "qualityScore": 0.3,
+    "dataTypeMatch": false,
+    "formatMatch": false,
+    "successCriteriaMet": [false, false],
+    "gapAnalysis": "Content does not match expected format and lacks required elements",
+    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
+    "validationDetails": [
+        {{
+            "documentName": "Content Validation",
+            "issues": ["Format mismatch", "Missing required elements"],
+            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
+        }}
+    ]
+}}
+"""
+                response = await self.services.ai.callAi(
+                    prompt=explicitPrompt,
+                    documents=None,
+                    options=request_options
+                )
+            
+            if not response or not response.strip():
+                logger.warning("AI validation returned empty response")
+                return self._createFailedValidationResult("AI validation failed - empty response")
+            
+            # Clean and extract JSON from response
+            result = response.strip()
+            logger.debug(f"AI validation response length: {len(result)}")
+            
+            # Try to find JSON in the response with multiple strategies
+            import re
+            
+            # Strategy 1: Look for JSON in markdown code blocks
+            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1)
+                logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
+            else:
+                # Strategy 2: Look for JSON object with proper structure
+                json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
+                if not json_match:
+                    # Strategy 3: Look for any JSON object
+                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                
+                if json_match:
+                    result = json_match.group(0)
+                    logger.debug(f"Extracted JSON directly: {result[:200]}...")
+                else:
+                    logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
+                    logger.debug(f"Full AI response: {result}")
+                    
+                    # Try fallback extraction for text responses
+                    fallback_result = self._extractFallbackValidationResult(result)
+                    if fallback_result:
+                        logger.info("Using fallback text extraction for validation")
+                        return fallback_result
+                    
+                    logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
+                    return self._createFailedValidationResult("AI validation failed - no JSON in response")
+            
+            try:
+                aiResult = json.loads(result)
+                logger.info("AI validation JSON parsed successfully")
+                
+                return {
+                    "overallSuccess": aiResult.get("overallSuccess", False),
+                    "qualityScore": aiResult.get("qualityScore", 0.0),
+                    "validationDetails": aiResult.get("validationDetails", [{
+                        "documentName": "AI Validation",
+                        "gapAnalysis": aiResult.get("gapAnalysis", ""),
+                        "successCriteriaMet": aiResult.get("successCriteriaMet", [False])
+                    }]),
+                    "improvementSuggestions": aiResult.get("improvementSuggestions", [])
+                }
+                
+            except json.JSONDecodeError as json_error:
+                logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
+                logger.debug(f"JSON content: {result}")
+                
+                # Try to extract key information from malformed response
+                fallbackResult = self._extractFallbackValidationResult(result)
+                if fallbackResult:
+                    logger.info("Using fallback validation result from malformed JSON")
+                    return fallbackResult
+                
+                return self._createFailedValidationResult(f"AI validation failed - invalid JSON: {str(json_error)}")
+            
+            return self._createFailedValidationResult("AI validation failed - no response")
+            
+        except Exception as e:
+            logger.error(f"AI validation failed: {str(e)}")
+            return self._createFailedValidationResult(f"AI validation error: {str(e)}")
--- a/modules/workflows/processing/adaptive/intentAnalyzer.py
+++ b/modules/workflows/processing/adaptive/intentAnalyzer.py
@ -1,228 +1,156 @@
 # intentAnalyzer.py
-# Intent analysis for adaptive React mode
+# Intent analysis for adaptive React mode - AI-based, language-agnostic

-import re
+import json
 import logging
 from typing import Dict, Any, List
-from enum import Enum

 logger = logging.getLogger(__name__)

-class DataType(Enum):
-    NUMBERS = "numbers"
-    TEXT = "text"
-    DOCUMENTS = "documents"
-    ANALYSIS = "analysis"
-    CODE = "code"
-    UNKNOWN = "unknown"
-
-class ExpectedFormat(Enum):
-    RAW_DATA = "raw_data"
-    FORMATTED = "formatted"
-    STRUCTURED = "structured"
-    VISUAL = "visual"
-    UNKNOWN = "unknown"
-
 class IntentAnalyzer:
-    """Analyzes user intent to understand what they actually want"""
+    """Analyzes user intent using AI - language-agnostic and generic"""
    
-    def __init__(self):
-        self.dataTypePatterns = {
-            DataType.NUMBERS: [
-                r'\b(numbers?|digits?|count|list|sequence)\b',
-                r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
-                r'\b(calculate|compute|generate)\s+(numbers?)\b',
-                r'\b(first|last)\s+\d+\s+(numbers?)\b'
-            ],
-            DataType.TEXT: [
-                r'\b(text|content|words?|sentences?|paragraphs?)\b',
-                r'\b(write|create|generate)\s+(text|content)\b',
-                r'\b(summary|description|explanation)\b',
-                r'\b(article|essay|report)\b'
-            ],
-            DataType.DOCUMENTS: [
-                r'\b(document|file|report|pdf|word|excel)\b',
-                r'\b(create|generate|make)\s+(document|file|report)\b',
-                r'\b(format|structure|organize)\s+(document)\b',
-                r'\b(presentation|slides?)\b'
-            ],
-            DataType.ANALYSIS: [
-                r'\b(analyze|analysis|examine|study|evaluate)\b',
-                r'\b(insights?|findings?|results?)\b',
-                r'\b(compare|contrast|evaluate)\b',
-                r'\b(trends?|patterns?)\b'
-            ],
-            DataType.CODE: [
-                r'\b(code|program|script|algorithm|function)\b',
-                r'\b(write|create|develop)\s+(code|program|script)\b',
-                r'\b(implement|build|construct)\b',
-                r'\b(debug|fix|optimize)\s+(code)\b'
-            ]
-        }
+    def __init__(self, services=None):
+        self.services = services
    
-        self.formatPatterns = {
-            ExpectedFormat.RAW_DATA: [
-                r'\b(raw|plain|simple|basic)\b',
-                r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
-                r'\b(just|only)\s+(numbers?|data)\b'
-            ],
-            ExpectedFormat.FORMATTED: [
-                r'\b(formatted|structured|organized|presented)\b',
-                r'\b(table|chart|graph|visual)\b',
-                r'\b(pretty|nice|clean)\s+(format|presentation)\b',
-                r'\b(professional|polished)\b'
-            ],
-            ExpectedFormat.STRUCTURED: [
-                r'\b(json|xml|csv|structured)\b',
-                r'\b(organized|categorized|grouped)\b',
-                r'\b(systematic|methodical)\b',
-                r'\b(database|spreadsheet)\b'
-            ]
-        }
-    
-    def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
-        """Analyzes user intent from prompt and context"""
+    async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
+        """Analyzes user intent from prompt and context using AI"""
        try:
-            # Extract primary goal
-            primaryGoal = self._extractPrimaryGoal(userPrompt)
+            # Use AI to analyze intent
+            aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context)
+            if aiAnalysis:
+                return aiAnalysis
            
-            # Classify data type
-            dataType = self._classifyDataType(userPrompt)
-            
-            # Determine expected format
-            expectedFormat = self._determineExpectedFormat(userPrompt)
-            
-            # Assess quality requirements
-            qualityRequirements = self._assessQualityRequirements(userPrompt, context)
-            
-            # Extract success criteria
-            successCriteria = self._extractSuccessCriteria(userPrompt, context)
-            
-            # Calculate confidence score
-            confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
-            
-            return {
-                "primaryGoal": primaryGoal,
-                "dataType": dataType.value,
-                "expectedFormat": expectedFormat.value,
-                "qualityRequirements": qualityRequirements,
-                "successCriteria": successCriteria,
-                "confidenceScore": confidenceScore
-            }
+            # Fallback to basic analysis if AI fails
+            return self._createBasicIntentAnalysis(userPrompt)
            
        except Exception as e:
            logger.error(f"Error analyzing user intent: {str(e)}")
            return self._createDefaultIntentAnalysis(userPrompt)
    
-    def _extractPrimaryGoal(self, userPrompt: str) -> str:
-        """Extracts the primary goal from user prompt"""
-        # Simple extraction - can be enhanced
-        return userPrompt.strip()
+    async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]:
+        """Uses AI to analyze user intent - language-agnostic"""
+        try:
+            if not self.services or not hasattr(self.services, 'ai'):
+                return None
            
-    def _classifyDataType(self, userPrompt: str) -> DataType:
-        """Classifies the type of data the user wants"""
-        promptLower = userPrompt.lower()
+            # Create AI analysis prompt
+            analysisPrompt = f"""
+You are an intent analyzer. Analyze the user's request to understand what they want delivered.

-        for dataType, patterns in self.dataTypePatterns.items():
-            for pattern in patterns:
-                if re.search(pattern, promptLower):
-                    return dataType
+USER REQUEST: {userPrompt}

-        return DataType.UNKNOWN
+CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}

-    def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
-        """Determines the expected format of the output"""
-        promptLower = userPrompt.lower()
+Analyze the user's intent and determine:
+1. What type of data/content they want (numbers, text, documents, analysis, code, etc.)
+2. What format they expect (raw data, formatted, structured, visual, etc.)
+3. What quality requirements they have (accuracy, completeness, format)
+4. What specific success criteria define completion

-        for formatType, patterns in self.formatPatterns.items():
-            for pattern in patterns:
-                if re.search(pattern, promptLower):
-                    return formatType
+CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON.

-        return ExpectedFormat.UNKNOWN
+{{
+    "primaryGoal": "The main objective the user wants to achieve",
+    "dataType": "numbers|text|documents|analysis|code|unknown",
+    "expectedFormat": "raw_data|formatted|structured|visual|unknown",
+    "qualityRequirements": {{
+        "accuracyThreshold": 0.0-1.0,
+        "completenessThreshold": 0.0-1.0,
+        "formatRequirement": "any|formatted|raw|structured"
+    }},
+    "successCriteria": ["specific criterion 1", "specific criterion 2"],
+    "confidenceScore": 0.0-1.0
+}}
+"""
            
-    def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
-        """Assesses quality requirements from prompt and context"""
-        promptLower = userPrompt.lower()
+            # Call AI service for analysis
+            from modules.datamodels.datamodelAi import AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
            
-        # Check for accuracy requirements
-        accuracyThreshold = 0.8
-        if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
-            accuracyThreshold = 0.95
-        elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
-            accuracyThreshold = 0.7
+            response = await self.services.ai.callAi(
+                prompt=analysisPrompt,
+                documents=None,
+                options=request_options
+            )
            
-        # Check for completeness requirements
-        completenessThreshold = 0.8
-        if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
-            completenessThreshold = 0.95
-        elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
-            completenessThreshold = 0.6
+            # If first attempt fails, try with more explicit prompt
+            if response and not self._isValidJsonResponse(response):
+                logger.debug("First AI intent analysis attempt failed, retrying with explicit JSON-only prompt")
+                explicitPrompt = f"""
+{analysisPrompt}

-        # Check for format requirements
-        formatRequirement = "any"
-        if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
-            formatRequirement = "formatted"
-        elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
-            formatRequirement = "raw"
+IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, no text before or after. Just the JSON object.
+"""
+                response = await self.services.ai.callAi(
+                    prompt=explicitPrompt,
+                    documents=None,
+                    options=request_options
+                )
            
+            if not response or not response.strip():
+                logger.warning("AI intent analysis returned empty response")
+                return None
+            
+            # Clean and extract JSON from response
+            result = response.strip()
+            logger.debug(f"AI intent analysis response length: {len(result)}")
+            
+            # Try to find JSON in the response with multiple strategies
+            import re
+            
+            # Strategy 1: Look for JSON in markdown code blocks
+            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
+            if json_match:
+                result = json_match.group(1)
+                logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
+            else:
+                # Strategy 2: Look for JSON object with proper structure
+                json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', result, re.DOTALL)
+                if not json_match:
+                    # Strategy 3: Look for any JSON object
+                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                
+                if not json_match:
+                    logger.warning(f"All AI intent analysis attempts failed - no JSON found in response: {result[:200]}...")
+                    logger.debug(f"Full AI response: {result}")
+                    return None
+                
+                result = json_match.group(0)
+                logger.debug(f"Extracted JSON directly: {result[:200]}...")
+            
+            try:
+                aiResult = json.loads(result)
+                logger.info("AI intent analysis JSON parsed successfully")
+                return aiResult
+                
+            except json.JSONDecodeError as json_error:
+                logger.warning(f"All AI intent analysis attempts failed - invalid JSON: {str(json_error)}")
+                logger.debug(f"JSON content: {result}")
+                return None
+            
+            return None
+            
+        except Exception as e:
+            logger.error(f"AI intent analysis failed: {str(e)}")
+            return None
+    
+    def _createBasicIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
+        """Creates basic intent analysis without AI"""
        return {
-            "accuracyThreshold": accuracyThreshold,
-            "completenessThreshold": completenessThreshold,
-            "formatRequirement": formatRequirement
+            "primaryGoal": userPrompt.strip(),
+            "dataType": "unknown",
+            "expectedFormat": "unknown",
+            "qualityRequirements": {
+                "accuracyThreshold": 0.8,
+                "completenessThreshold": 0.8,
+                "formatRequirement": "any"
+            },
+            "successCriteria": ["Delivers what the user requested"],
+            "confidenceScore": 0.5
        }
    
-    def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
-        """Extracts success criteria from prompt and context"""
-        criteria = []
-        promptLower = userPrompt.lower()
-        
-        # Extract explicit criteria
-        if 'first' in promptLower and 'numbers' in promptLower:
-            criteria.append("Contains the first N numbers as requested")
-        
-        if 'prime' in promptLower:
-            criteria.append("Contains actual prime numbers, not code to generate them")
-        
-        if 'document' in promptLower:
-            criteria.append("Creates a properly formatted document")
-        
-        if 'format' in promptLower:
-            criteria.append("Content is properly formatted as requested")
-        
-        # Add context-based criteria
-        if hasattr(context, 'task_step') and context.task_step:
-            taskObjective = context.task_step.objective.lower()
-            if 'word' in taskObjective:
-                criteria.append("Creates a Word document")
-            if 'excel' in taskObjective:
-                criteria.append("Creates an Excel spreadsheet")
-        
-        return criteria if criteria else ["Delivers what the user requested"]
-    
-    def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat, 
-                                successCriteria: List[str]) -> float:
-        """Calculates confidence score for the intent analysis"""
-        score = 0.0
-        
-        # Data type confidence
-        if dataType != DataType.UNKNOWN:
-            score += 0.3
-        
-        # Format confidence
-        if expectedFormat != ExpectedFormat.UNKNOWN:
-            score += 0.2
-        
-        # Success criteria confidence
-        if len(successCriteria) > 0:
-            score += 0.3
-        
-        # Additional confidence for specific patterns
-        if len(successCriteria) > 1:
-            score += 0.2
-        
-        return min(score, 1.0)
-    
    def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
        """Creates a default intent analysis when analysis fails"""
        return {
@ -237,3 +165,16 @@ class IntentAnalyzer:
            "successCriteria": ["Delivers what the user requested"],
            "confidenceScore": 0.1
        }
+    
+    def _isValidJsonResponse(self, response: str) -> bool:
+        """Checks if response contains valid JSON structure"""
+        try:
+            import re
+            # Look for JSON with expected structure
+            json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', response, re.DOTALL)
+            if json_match:
+                json.loads(json_match.group(0))
+                return True
+            return False
+        except:
+            return False
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@ -31,8 +31,8 @@ class ReactMode(BaseMode):
    def __init__(self, services, workflow):
        super().__init__(services, workflow)
        # Initialize adaptive components
-        self.intentAnalyzer = IntentAnalyzer()
-        self.contentValidator = ContentValidator()
+        self.intentAnalyzer = IntentAnalyzer(services)
+        self.contentValidator = ContentValidator(services)
        self.learningEngine = LearningEngine()
        self.progressTracker = ProgressTracker()
        self.currentIntent = None
@ -49,13 +49,14 @@ class ReactMode(BaseMode):
        """Execute task using React mode - iterative plan-act-observe-refine loop"""
        logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
        
-        # NEW: Analyze user intent with both original prompt and task objective
-        # Get original user prompt from services (clean and reliable)
+        # NEW: Analyze intents separately for proper validation vs task completion
+        # Workflow-level intent from cleaned original user prompt
        original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
-        combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
-        
-        self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
-        logger.info(f"Intent analysis (original + task): {self.currentIntent}")
+        self.workflowIntent = await self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
+        # Task-level intent from current task objective (used only for task-scoped checks)
+        self.taskIntent = await self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
+        logger.info(f"Intent analysis — workflow: {self.workflowIntent}")
+        logger.info(f"Intent analysis — task: {self.taskIntent}")
        
        # NEW: Reset progress tracking for new task
        self.progressTracker.reset()
@ -99,18 +100,18 @@ class ReactMode(BaseMode):
                # Attach deterministic label for clarity
                observation['resultLabel'] = result.resultLabel
                
-                # NEW: Add content validation
-                if self.currentIntent and result.documents:
-                    validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
+                # NEW: Add content validation (against original cleaned user prompt / workflow intent)
+                if getattr(self, 'workflowIntent', None) and result.documents:
+                    validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
                    observation['contentValidation'] = validationResult
                    logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
                    
                    # NEW: Learn from feedback
-                    feedback = self._collectFeedback(result, validationResult, self.currentIntent)
-                    self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
+                    feedback = self._collectFeedback(result, validationResult, self.workflowIntent)
+                    self.learningEngine.learnFromFeedback(feedback, context, self.workflowIntent)
                    
                    # NEW: Update progress
-                    self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
+                    self.progressTracker.updateProgress(result, validationResult, self.workflowIntent)
                
                decision = await self._refineDecide(context, observation)
                
@ -204,6 +205,11 @@ class ReactMode(BaseMode):
        selection = json.loads(response[jsonStart:jsonEnd])
        if 'action' not in selection or not isinstance(selection['action'], str):
            raise ValueError("Selection missing 'action' as string")
+        
+        # Validate document references - prevent AI from inventing Message IDs
+        if 'requiredInputDocuments' in selection:
+            self._validateDocumentReferences(selection['requiredInputDocuments'], context)
+        
        # Enforce spec: Stage 1 must NOT include 'parameters'
        if 'parameters' in selection:
            # Remove to avoid accidental carryover
@ -213,6 +219,38 @@ class ReactMode(BaseMode):
                selection['parameters'] = None
        return selection

+    def _validateDocumentReferences(self, document_refs: List[str], context: TaskContext) -> None:
+        """Validate that document references exist in the current workflow"""
+        if not document_refs:
+            return
+            
+        # Get available documents from the current workflow
+        try:
+            available_docs = self.services.workflow.getAvailableDocuments(self.services.currentWorkflow)
+            if not available_docs or available_docs == "No documents available":
+                logger.warning("No documents available for validation")
+                return
+                
+            # Extract all valid references from available documents
+            valid_refs = []
+            for line in available_docs.split('\n'):
+                if 'docList:' in line or 'docItem:' in line:
+                    # Extract reference from line like "  - docList:msg_xxx:label" or "  - docItem:xxx:filename with spaces"
+                    ref_match = re.search(r'(docList:[^\s]+|docItem:[^\s]+(?:\s+[^\s]+)*)', line)
+                    if ref_match:
+                        valid_refs.append(ref_match.group(1))
+            
+            # Check if all provided references are valid
+            for ref in document_refs:
+                if ref not in valid_refs:
+                    logger.error(f"Invalid document reference: {ref}")
+                    logger.error(f"Available references: {valid_refs}")
+                    raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.")
+                    
+        except Exception as e:
+            logger.error(f"Error validating document references: {str(e)}")
+            raise ValueError(f"Failed to validate document references: {str(e)}")
+
    async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep, 
                         workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
        """Act: request minimal parameters then execute selected action"""
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@ -42,13 +42,24 @@ def extractUserPrompt(context: Any) -> str:
    Fallback to the task_step objective.
    """
    try:
-        # Prefer services.currentUserPrompt when accessible through context
        services = getattr(context, 'services', None)
-        if services and getattr(services, 'currentUserPrompt', None):
-            return services.currentUserPrompt
-    except Exception:
-        pass

+        # Determine raw user prompt from services or task_step
+        rawPrompt = None
+        if services and getattr(services, 'currentUserPrompt', None):
+            rawPrompt = services.currentUserPrompt
+        elif hasattr(context, 'task_step') and context.task_step:
+            rawPrompt = context.task_step.objective or 'No request specified'
+        else:
+            rawPrompt = 'No request specified'
+
+        # Prefer values computed at workflow start by WorkflowManager analyzer
+        normalized = getattr(services, 'currentUserPromptNormalized', None) if services else None
+        if normalized:
+            return normalized
+        return rawPrompt
+    except Exception:
+        # Robust fallback behavior
        if hasattr(context, 'task_step') and context.task_step:
            return context.task_step.objective or 'No request specified'
        return 'No request specified'
@ -57,19 +68,11 @@ def extractWorkflowHistory(service: Any, context: Any) -> str:
    """Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
    Reverse-chronological, enriched with message summaries and document labels.
    """
-    # Prefer explicit workflow on context; else fall back to services.workflow
-    workflow = None
    try:
-        if hasattr(context, 'workflow') and context.workflow:
-            workflow = context.workflow
-        elif hasattr(service, 'workflow') and service.workflow:
-            workflow = service.workflow
-    except Exception:
-        workflow = None
-
-    if workflow:
-        history = getPreviousRoundContext(service, workflow)
+        history = getPreviousRoundContext(service, service.currentWorkflow)
        return history or "No previous workflow rounds available"
+    except Exception as e:
+        logger.error(f"Error getting workflow history: {str(e)}")
        return "No previous workflow rounds available"

 def extractAvailableMethods(service: Any) -> str:
@ -99,7 +102,15 @@ def extractAvailableMethods(service: Any) -> str:

 def extractUserLanguage(service: Any) -> str:
    """Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
+    try:
+        # Prefer detected language if available
+        if service and getattr(service, 'currentUserLanguage', None):
+            return service.currentUserLanguage
        return service.user.language if service and service.user else 'en'
+    except Exception:
+        return 'en'
+
+# Normalization now happens centrally in WorkflowManager._sendFirstMessage; no AI call here.


 def _computeMessageSummary(msg) -> str:
@ -371,9 +382,10 @@ def extractLatestRefinementFeedback(context: Any) -> str:
 def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
    """Summary of available documents (count only)."""
    try:
-        documents = service.workflow.getAvailableDocuments(context.workflow)
+        documents = service.workflow.getAvailableDocuments(service.currentWorkflow)
        if documents and documents != "No documents available":
-            doc_count = documents.count("docList:") + documents.count("docItem:")
+            # Count only actual documents, not list labels
+            doc_count = documents.count("docItem:")
            return f"{doc_count} documents available from previous tasks"
        return "No documents available"
    except Exception as e:
@ -383,7 +395,7 @@ def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
 def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
    """Index of available documents with detailed references for parameter generation."""
    try:
-        return service.workflow.getAvailableDocuments(context.workflow)
+        return service.workflow.getAvailableDocuments(service.currentWorkflow)
    except Exception as e:
        logger.error(f"Error getting document index: {str(e)}")
        return "No documents available"
--- a/modules/workflows/processing/shared/promptGenerationActionsReact.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py
@ -32,7 +32,7 @@ def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
        PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
    ]

-    template = """Select exactly one action to advance the task.
+    template = """Select exactly one next action to advance the task incrementally.

 OBJECTIVE:
 {{KEY:USER_PROMPT}}
@ -52,7 +52,11 @@ AVAILABLE_DOCUMENTS_INDEX:
 AVAILABLE_CONNECTIONS_INDEX:
 {{KEY:AVAILABLE_CONNECTIONS_INDEX}}

-REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
+REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text). The chosen action MUST:
+- be the next logical incremental step toward fulfilling the objective
+- not attempt to complete the entire objective in one step
+- if producing files, target exactly one output format for this step
+- reference ONLY existing document IDs/labels from AVAILABLE_DOCUMENTS_INDEX
 {{
    "action": "method.action_name",
    "actionObjective": "...",
@ -64,7 +68,7 @@ REPLY: Return ONLY a JSON object with the following structure (no comments, no e

 EXAMPLE how to assign references from AVAILABLE_DOCUMENTS_INDEX and AVAILABLE_CONNECTIONS_INDEX:
 "requiredInputDocuments": ["docList:msg_47a7a578-e8f2-4ba8-ac66-0dbff40605e0:round8_task1_action1_results","docItem:5d8b7aee-b546-4487-b6a8-835c86f7b186:AI_Generated_Document_20251006-104256.docx"],
-"requiredConnection": "connection:msft:p.motsch@valueon.ch:1ae8b8e5-128b-49b8-b1cb-7c632669eeae",
+"requiredConnection": "connection:msft:p.motsch@valueon.ch",

 RULES:
 1. Use EXACT action names from AVAILABLE_METHODS
@ -72,7 +76,11 @@ RULES:
 3. parametersContext must be short and sufficient for Stage 2
 4. Return ONLY JSON - no markdown, no explanations
 5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
+   - DO NOT invent or modify Message IDs
+   - DO NOT create new references
+   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX
 6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
+7. Plan incrementally: if the overall intent needs multiple output formats (e.g., CSV and HTML), choose one format in this step and leave the other(s) for subsequent steps
 """

    return PromptBundle(prompt=template, placeholders=placeholders)
--- a/modules/workflows/processing/shared/promptGenerationTaskplan.py
+++ b/modules/workflows/processing/shared/promptGenerationTaskplan.py
@ -28,6 +28,8 @@ def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:

 Break down user requests into logical, executable task steps.

+**IMPORTANT**: If the user asks for ONE complete business objective, create ONLY ONE task that accomplishes the entire objective. Do NOT split it into multiple micro-tasks.
+
 ## 📋 Context

 ### User Request
@ -46,12 +48,20 @@ Break down user requests into logical, executable task steps.
 - **ONE TOPIC PER TASK** - Each task should handle one complete business objective
 - **HIGH-LEVEL FOCUS** - Plan strategic outcomes, not implementation steps
 - **AVOID MICRO-TASKS** - Don't create separate tasks for each small action
+- **CRITICAL**: If the user asks for ONE thing (like "analyse document list and produce summary"), create ONLY ONE task that does the complete job

 ### Task Grouping Examples
 - **Research + Analysis + Report** → ONE task: "Web research report"
 - **Data Collection + Processing + Visualization** → ONE task: "Collect and present data"
+- **Document splitting** (analyze + extract + create files) → ONE task: "Split document into separate files"
 - **Different topics** (email + flowers) → SEPARATE tasks: "Send formal email..." + "Order flowers from Fleurop for delivery to 123 Main St, include card message"

+### Common Single-Task Scenarios
+- **"Split document into sections"** → ONE task: "Split document into separate files"
+- **"Extract data and create report"** → ONE task: "Extract data and create report"
+- **"Analyze and summarize document"** → ONE task: "Analyze and summarize document"
+- **"Convert file to different format"** → ONE task: "Convert file to different format"
+
 ### Retry Handling
 - **If retry request**: Analyze previous rounds to understand what failed
 - **Learn from mistakes**: Improve the plan based on previous failures
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -216,23 +216,23 @@ class WorkflowManager:
                    # Update the message with documents in database
                    self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})

-                # Analyze the user's input to extract intent and offload bulky context into documents
+                # Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents
                try:
                    analyzerPrompt = (
-                        "You are an input analyzer. Split the user's message into:\n"
-                        "1) intent: the user's core request in one concise paragraph, normalized to the user's language.\n"
-                        "2) contextItems: supportive data to attach as separate documents if significantly larger than the intent. "
-                        "Include large literal data blocks, long lists/tables, code/JSON blocks, quoted transcripts, CSV fragments, or detailed specs. "
-                        "Keep URLs in the intent unless they include large pasted content.\n\n"
+                        "You are an input analyzer. From the user's message, perform ALL of the following in one pass:\n"
+                        "1) detectedLanguage: detect ISO 639-1 language code (e.g., de, en).\n"
+                        "2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
+                        "3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
+                        "4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n\n"
                        "Rules:\n"
-                        "- If total content length (intent + data) is less than 10% of the model's max tokens, do not extract; "
-                        "return an empty contextItems and keep a compact, self-contained intent.\n"
-                        "- If content exceeds that, move bulky parts into contextItems, keeping the intent short and clear.\n"
-                        "- Preserve critical references (URLs, filenames) in the intent.\n"
-                        "- Normalize the intent to the detected language. If mixed-language, use the primary detected language and normalize.\n\n"
-                        "Output JSON only (no markdown):\n"
+                        "- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
+                        "- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
+                        "- Preserve critical references (URLs, filenames) in intent.\n"
+                        "- Normalize to the primary detected language if mixed-language.\n\n"
+                        "Return ONLY JSON (no markdown) with this shape:\n"
                        "{\n"
-                        "  \"detectedLanguage\": \"en\",\n"
+                        "  \"detectedLanguage\": \"de|en|fr|it|...\",\n"
+                        "  \"normalizedRequest\": \"Full explicit instruction in detected language\",\n"
                        "  \"intent\": \"Concise normalized request...\",\n"
                        "  \"contextItems\": [\n"
                        "    {\n"
@ -249,6 +249,7 @@ class WorkflowManager:
                    aiResponse = await self.services.ai.callAi(prompt=analyzerPrompt)

                    detectedLanguage = None
+                    normalizedRequest = None
                    intentText = userInput.prompt
                    contextItems = []

@ -260,6 +261,7 @@ class WorkflowManager:
                        if jsonStart != -1 and jsonEnd > jsonStart:
                            parsed = json.loads(aiResponse[jsonStart:jsonEnd])
                            detectedLanguage = parsed.get('detectedLanguage') or None
+                            normalizedRequest = parsed.get('normalizedRequest') or None
                            if parsed.get('intent'):
                                intentText = parsed.get('intent')
                            contextItems = parsed.get('contextItems') or []
@ -269,7 +271,18 @@ class WorkflowManager:
                    # Update services state
                    if detectedLanguage and isinstance(detectedLanguage, str):
                        self._setUserLanguage(detectedLanguage)
+                        try:
+                            setattr(self.services, 'currentUserLanguage', detectedLanguage)
+                        except Exception:
+                            pass
                    self.services.currentUserPrompt = intentText or userInput.prompt
+                    try:
+                        if normalizedRequest:
+                            setattr(self.services, 'currentUserPromptNormalized', normalizedRequest)
+                        if contextItems is not None:
+                            setattr(self.services, 'currentUserContextItems', contextItems)
+                    except Exception:
+                        pass

                    # Telemetry (sizes and counts)
                    try:
@ -329,8 +342,6 @@ class WorkflowManager:
                            if not message.documents:
                                message.documents = []
                            message.documents.extend(created_docs)
-                            # Ensure label is user_context for discoverability
-                            message.documentsLabel = context_label
                            self.services.workflow.updateMessage(message.id, {
                                "documents": [d.to_dict() for d in message.documents],
                                "documentsLabel": context_label
--- a/requirements.txt
+++ b/requirements.txt
@ -41,6 +41,7 @@ markdown
 ## Web Scraping & HTTP
 beautifulsoup4==4.12.2  # Required for HTML/XML parsing
 requests==2.31.0
+requests-oauthlib==1.3.1  # Required for Google OAuth2Session
 chardet>=5.0.0      # Für Zeichensatzerkennung bei Webinhalten
 aiohttp>=3.8.0      # Required for SharePoint operations (async HTTP)
 selenium>=4.15.0    # Required for web automation and JavaScript-heavy pages
--- a/test_document_processing.py
+++ b/test_document_processing.py
@ -0,0 +1,555 @@
+"""
+Test script for document processing and DOCX generation.
+Calls the main AI service directly to process PDF documents and generate DOCX summaries.
+"""
+
+import asyncio
+import sys
+import os
+import logging
+import base64
+from datetime import datetime
+from pathlib import Path
+
+# Add the gateway module to the path
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
+
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelAi import EnhancedAiCallOptions
+from modules.services.serviceAi.mainServiceAi import AiService
+from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+
+# Set up logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+
+async def process_documents_and_generate_summary():
+    """Process documents using the main AI service with intelligent chunk integration."""
+    logger.info("🚀 Starting intelligent chunk integration test...")
+    
+    # Find testdata directory
+    testdata_path = Path("../wiki/poweron/testdata")
+    if not testdata_path.exists():
+        # Try relative to current directory
+        testdata_path = Path("wiki/poweron/testdata")
+        if not testdata_path.exists():
+            # Try relative to parent directory
+            testdata_path = Path("../wiki/poweron/testdata")
+            if not testdata_path.exists():
+                logger.error(f"❌ Testdata path not found. Tried:")
+                logger.error(f"  - ../wiki/poweron/testdata")
+                logger.error(f"  - wiki/poweron/testdata") 
+                logger.error(f"  - ../wiki/poweron/testdata")
+                logger.info("Please ensure the testdata folder exists with PDF documents")
+                return False
+    
+    # Find all supported document files
+    supported_extensions = [
+        # Document formats
+        "*.pdf", "*.docx", "*.xlsx", "*.pptx", "*.ppt",
+        # Image formats
+        "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp", "*.bmp", "*.tiff",
+        # Text and code files
+        "*.txt", "*.md", "*.log", "*.rtf", "*.tex", "*.rst", "*.adoc", "*.org", "*.pod",
+        "*.java", "*.js", "*.jsx", "*.ts", "*.tsx", "*.py", "*.rb", "*.go", "*.rs", "*.cpp", "*.c", "*.h", "*.hpp", "*.cc", "*.cxx",
+        "*.cs", "*.php", "*.swift", "*.kt", "*.scala", "*.clj", "*.hs", "*.ml", "*.fs", "*.vb", "*.dart", "*.r", "*.m", "*.pl", "*.sh",
+        "*.html", "*.htm", "*.css", "*.scss", "*.sass", "*.less", "*.vue", "*.svelte",
+        "*.config", "*.ini", "*.cfg", "*.conf", "*.properties", "*.yaml", "*.yml", "*.toml", "*.json", "*.xml",
+        "*.bat", "*.ps1", "*.psm1", "*.psd1", "*.vbs", "*.wsf", "*.cmd", "*.com",
+        "*.csv", "*.tsv", "*.tab", "*.dat", "*.data",
+        "*.man", "*.1", "*.2", "*.3", "*.4", "*.5", "*.6", "*.7", "*.8", "*.9", "*.n", "*.l", "*.m", "*.r", "*.t", "*.x", "*.y", "*.z",
+        "*.diff", "*.patch", "*.gitignore", "*.dockerignore", "*.editorconfig", "*.gitattributes",
+        "*.env", "*.env.local", "*.env.development", "*.env.production", "*.env.test",
+        "*.lock", "*.lockb", "*.lockfile", "*.pkg-lock", "*.yarn-lock"
+    ]
+    document_files = []
+    for ext in supported_extensions:
+        document_files.extend(list(testdata_path.glob(ext)))
+    
+    logger.info(f"Found {len(document_files)} document files in testdata:")
+    for doc_file in document_files:
+        logger.info(f"  - {doc_file.name}")
+    
+    if not document_files:
+        logger.error("❌ No supported document files found in testdata folder")
+        return False
+    
+    try:
+        # Mock the database interface to provide our file data BEFORE creating AI service
+        class TestDbInterface:
+            def __init__(self, file_data_map):
+                self.file_data_map = file_data_map
+            
+            def getFileData(self, file_id):
+                logger.info(f"TestDbInterface.getFileData called with file_id: {file_id}")
+                data = self.file_data_map.get(file_id)
+                if data:
+                    logger.info(f"✅ Found file data for {file_id}: {len(data)} bytes")
+                else:
+                    logger.warning(f"❌ No file data found for {file_id}")
+                return data
+        
+        # Create file data mapping
+        file_data_map = {}
+        for i, doc_file in enumerate(document_files):
+            with open(doc_file, 'rb') as f:
+                file_data_map[f"test_doc_{i+1}"] = f.read()
+                logger.info(f"📁 Loaded {doc_file.name} as test_doc_{i+1}: {len(file_data_map[f'test_doc_{i+1}'])} bytes")
+        
+        # Mock the database interface BEFORE creating AI service
+        import modules.interfaces.interfaceDbComponentObjects as db_interface_module
+        original_get_interface = db_interface_module.getInterface
+        db_interface_module.getInterface = lambda: TestDbInterface(file_data_map)
+        logger.info("🔧 Database interface mocked successfully")
+        
+        # Create a mock service center with utils
+        class MockServiceCenter:
+            def __init__(self):
+                self.utils = MockUtils()
+        
+        class MockUtils:
+            def debugLogToFile(self, message, label):
+                logger.debug(f"[{label}] {message}")
+                print(f"DEBUG [{label}]: {message}")  # Also print to console for visibility
+                
+                # Only write to debug file if debug logging is enabled (matching real implementation)
+                debug_enabled = self.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+                if debug_enabled:
+                    try:
+                        import os
+                        from datetime import datetime, UTC
+                        debug_dir = self.configGet("APP_DEBUG_CHAT_WORKFLOW_DIR", "./test-chat")
+                        if not os.path.isabs(debug_dir):
+                            # If relative path, make it relative to the gateway directory
+                            gateway_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
+                            debug_dir = os.path.join(gateway_dir, debug_dir)
+                        
+                        os.makedirs(debug_dir, exist_ok=True)
+                        debug_file = os.path.join(debug_dir, "debug_workflow.log")
+                        timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
+                        debug_entry = f"[{timestamp}] [{label}] {message}\n"
+                        with open(debug_file, "a", encoding="utf-8") as f:
+                            f.write(debug_entry)
+                    except Exception:
+                        pass  # Don't fail on debug logging errors
+            
+            def configGet(self, key, default):
+                # Return debug settings
+                if key == "APP_DEBUG_CHAT_WORKFLOW_ENABLED":
+                    return True
+                elif key == "APP_DEBUG_CHAT_WORKFLOW_DIR":
+                    return "./test-chat"
+                return default
+        
+        mock_service_center = MockServiceCenter()
+        
+        # Initialize the main AI service - let it handle everything
+        logger.info("🔧 Initializing main AI service...")
+        ai_service = await AiService.create(mock_service_center)
+        
+        # Create test documents - the AI service will handle file access internally
+        documents = []
+        logger.info(f"📁 Found {len(document_files)} document files")
+        for i, doc_file in enumerate(document_files):
+            logger.info(f"📄 Processing file {i+1}/{len(document_files)}: {doc_file.name}")
+            # Determine MIME type based on file extension
+            mime_type = "application/octet-stream"  # default
+            if doc_file.suffix.lower() == '.pdf':
+                mime_type = "application/pdf"
+            elif doc_file.suffix.lower() in ['.jpg', '.jpeg']:
+                mime_type = "image/jpeg"
+            elif doc_file.suffix.lower() == '.png':
+                mime_type = "image/png"
+            elif doc_file.suffix.lower() == '.gif':
+                mime_type = "image/gif"
+            elif doc_file.suffix.lower() == '.docx':
+                mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+            elif doc_file.suffix.lower() == '.xlsx':
+                mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+            elif doc_file.suffix.lower() == '.pptx':
+                mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+            elif doc_file.suffix.lower() == '.ppt':
+                mime_type = "application/vnd.ms-powerpoint"
+            elif doc_file.suffix.lower() == '.html':
+                mime_type = "text/html"
+            elif doc_file.suffix.lower() == '.csv':
+                mime_type = "text/csv"
+            elif doc_file.suffix.lower() == '.json':
+                mime_type = "application/json"
+            elif doc_file.suffix.lower() in ['.txt', '.md']:
+                mime_type = "text/plain"
+            
+            chat_doc = ChatDocument(
+                fileId=f"test_doc_{i+1}",
+                messageId=f"test_message_{i+1}",
+                fileName=doc_file.name,
+                mimeType=mime_type,
+                fileSize=doc_file.stat().st_size,
+                roundNumber=1,
+                taskNumber=1,
+                actionNumber=1,
+                actionId=f"test_action_{i+1}"
+            )
+            documents.append(chat_doc)
+            logger.info(f"✅ Created ChatDocument: {chat_doc.fileName} ({chat_doc.mimeType}) - {chat_doc.fileSize} bytes")
+        
+        logger.info(f"📄 Created {len(documents)} document objects")
+        
+        # Create enhanced AI call options for intelligent chunked processing
+        ai_options = EnhancedAiCallOptions(
+            operationType="general",
+            enableParallelProcessing=True,
+            maxConcurrentChunks=5,  # Increased for better testing
+            preserveChunkMetadata=True,
+            chunkSeparator="\n\n---\n\n"
+        )
+        
+        # Call the main AI service directly - let it handle everything including DOCX generation
+        logger.info("🤖 Calling main AI service with intelligent merging...")
+        
+        
+        # Run a single end-to-end test to avoid the loop issue
+        logger.info("🧪 Running single end-to-end test...")
+        
+        userPrompt = "Analyze the document containing mails for customer use cases. Can you create one file for each email in plain text format?"
+
+        # userPrompt = "Can you create one file for each section in the document"
+
+        # userPrompt = "Analyze these documents and create a fitting image for the content"
+
+        # userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
+
+        # userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted."
+
+        # userPrompt = "Create a docx file containing the combined documents in french language."
+
+        try:
+            # Single AI call with DOCX generation
+            ai_response = await ai_service.callAi(
+                prompt=userPrompt,
+                documents=documents,
+                options=ai_options,
+                outputFormat="txt",
+                title="Kunden und Use Cases"
+            )
+            
+            logger.info(f"✅ End-to-end test completed successfully")
+            logger.info(f"📊 Response type: {type(ai_response)}")
+            logger.info(f"📊 Response length: {len(str(ai_response))} characters")
+            
+            # Single test result
+            test_results = [{
+                "test_name": "End-to-End DOCX Generation",
+                "success": True,
+                "response_type": type(ai_response).__name__,
+                "response_length": len(str(ai_response)),
+                "response": ai_response
+            }]
+            
+        except Exception as e:
+            logger.error(f"❌ End-to-end test failed: {str(e)}")
+            test_results = [{
+                "test_name": "End-to-End DOCX Generation",
+                "success": False,
+                "error": str(e),
+                "response": None
+            }]
+        
+        logger.info(f"🎯 Completed 1 end-to-end test")
+        
+        # Process all test results and save outputs
+        logger.info("📊 Processing test results...")
+        
+        successful_tests = [r for r in test_results if r['success']]
+        failed_tests = [r for r in test_results if not r['success']]
+        
+        logger.info(f"✅ Successful tests: {len(successful_tests)}")
+        logger.info(f"❌ Failed tests: {len(failed_tests)}")
+        
+        # Display test results summary
+        logger.info("=" * 80)
+        logger.info("END-TO-END TEST RESULTS SUMMARY")
+        logger.info("=" * 80)
+        for i, result in enumerate(test_results, 1):
+            status = "✅ PASS" if result['success'] else "❌ FAIL"
+            logger.info(f"Test {i}: {result['test_name']} - {status}")
+            if result['success']:
+                logger.info(f"  Response Type: {result['response_type']}")
+                logger.info(f"  Response Length: {result['response_length']} characters")
+            else:
+                logger.info(f"  Error: {result['error']}")
+        logger.info("=" * 80)
+        
+        # Create output directory if it doesn't exist
+        output_dir = Path("test-chat/unittestoutput")
+        output_dir.mkdir(parents=True, exist_ok=True)
+        
+        # Save all test results and generated files
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        
+        logger.info("💾 Saving test results and generated files...")
+        
+        try:
+            for i, result in enumerate(successful_tests, 1):
+                test_name = result['test_name'].replace(' ', '_').lower()
+                response = result['response']
+                
+                logger.info(f"💾 Saving Test {i}: {result['test_name']}")
+                
+                # Handle different response types
+                if isinstance(response, dict):
+                    # Document generation response
+                    if 'documents' in response and response['documents']:
+                        logger.info(f"📄 Found {len(response['documents'])} documents in response")
+                        
+                        for j, doc in enumerate(response['documents']):
+                            doc_name = doc.get('documentName', f'{test_name}_document_{j+1}')
+                            doc_data = doc.get('documentData', '')
+                            doc_mime = doc.get('mimeType', 'application/octet-stream')
+                            
+                            logger.info(f"📄 Document {j+1}: {doc_name}")
+                            logger.info(f"📄 MIME Type: {doc_mime}")
+                            logger.info(f"📄 Data length: {len(doc_data)} characters")
+                            
+                            # Determine file extension with better MIME type detection
+                            file_ext = '.bin'  # Default fallback
+                            
+                            if doc_mime:
+                                if 'docx' in doc_mime.lower() or 'wordprocessingml' in doc_mime.lower():
+                                    file_ext = '.docx'
+                                elif 'pdf' in doc_mime.lower():
+                                    file_ext = '.pdf'
+                                elif 'txt' in doc_mime.lower() or 'plain' in doc_mime.lower():
+                                    file_ext = '.txt'
+                                elif 'html' in doc_mime.lower():
+                                    file_ext = '.html'
+                                elif 'json' in doc_mime.lower():
+                                    file_ext = '.json'
+                                elif 'csv' in doc_mime.lower():
+                                    file_ext = '.csv'
+                                elif 'xlsx' in doc_mime.lower() or 'spreadsheetml' in doc_mime.lower():
+                                    file_ext = '.xlsx'
+                                elif 'pptx' in doc_mime.lower() or 'presentationml' in doc_mime.lower():
+                                    file_ext = '.pptx'
+                                elif 'markdown' in doc_mime.lower() or 'md' in doc_mime.lower():
+                                    file_ext = '.md'
+                                elif 'png' in doc_mime.lower() or 'image' in doc_mime.lower():
+                                    file_ext = '.png'
+                                elif 'jpg' in doc_mime.lower() or 'jpeg' in doc_mime.lower():
+                                    file_ext = '.jpg'
+                                else:
+                                    logger.warning(f"⚠️ Unknown MIME type: {doc_mime}, using .bin")
+                            
+                            # Also check filename for hints
+                            if doc_name and '.' in doc_name:
+                                name_ext = '.' + doc_name.split('.')[-1].lower()
+                                if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md', '.png', '.jpg', '.jpeg']:
+                                    file_ext = name_ext
+                                    logger.info(f"📄 Using extension from filename: {file_ext}")
+                            
+                            logger.info(f"📄 Final file extension: {file_ext}")
+                            
+                            # Save document
+                            output_path = output_dir / f"{test_name}_{timestamp}{file_ext}"
+                            
+                            # Handle different content types
+                            if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
+                                # Text-based formats - save directly as text
+                                with open(output_path, 'w', encoding='utf-8') as f:
+                                    f.write(doc_data)
+                                logger.info(f"✅ Document saved as text: {output_path} ({len(doc_data)} characters)")
+                            elif file_ext in ['.png', '.jpg', '.jpeg']:
+                                # Image formats - decode from base64
+                                try:
+                                    doc_bytes = base64.b64decode(doc_data)
+                                    with open(output_path, 'wb') as f:
+                                        f.write(doc_bytes)
+                                    logger.info(f"✅ Image saved: {output_path} ({len(doc_bytes)} bytes)")
+                                except Exception as e:
+                                    logger.warning(f"⚠️ Failed to decode image as base64: {e}")
+                                    # Save as text if base64 decoding fails
+                                    with open(output_path, 'w', encoding='utf-8') as f:
+                                        f.write(doc_data)
+                                    logger.info(f"✅ Image saved as text (fallback): {output_path}")
+                            else:
+                                # Other binary formats - decode from base64
+                                try:
+                                    doc_bytes = base64.b64decode(doc_data)
+                                    with open(output_path, 'wb') as f:
+                                        f.write(doc_bytes)
+                                    logger.info(f"✅ Document saved as binary: {output_path} ({len(doc_bytes)} bytes)")
+                                except Exception as e:
+                                    logger.warning(f"⚠️ Failed to decode document as base64: {e}")
+                                    # Save as text if base64 decoding fails
+                                    with open(output_path, 'w', encoding='utf-8') as f:
+                                        f.write(doc_data)
+                                    logger.info(f"✅ Document saved as text (fallback): {output_path}")
+                    
+                    # Also save raw content as text
+                    content = response.get('content', '')
+                    if content:
+                        text_path = output_dir / f"{test_name}_content_{timestamp}.txt"
+                        with open(text_path, 'w', encoding='utf-8') as f:
+                            # Handle both string and dictionary content
+                            if isinstance(content, dict):
+                                import json
+                                f.write(json.dumps(content, indent=2, ensure_ascii=False))
+                            else:
+                                f.write(str(content))
+                        logger.info(f"✅ Content saved: {text_path}")
+                
+                elif isinstance(response, str):
+                    # Text response
+                    text_path = output_dir / f"{test_name}_response_{timestamp}.txt"
+                    with open(text_path, 'w', encoding='utf-8') as f:
+                        f.write(response)
+                    logger.info(f"✅ Text response saved: {text_path}")
+                
+                else:
+                    logger.warning(f"⚠️ Unknown response type for {result['test_name']}: {type(response)}")
+            
+            # Save failed test details
+            if failed_tests:
+                error_path = output_dir / f"failed_tests_{timestamp}.txt"
+                with open(error_path, 'w', encoding='utf-8') as f:
+                    f.write("# Failed Test Details\n\n")
+                    for i, result in enumerate(failed_tests, 1):
+                        f.write(f"## Test {i}: {result['test_name']}\n")
+                        f.write(f"**Error:** {result['error']}\n\n")
+                logger.info(f"✅ Failed test details saved: {error_path}")
+            
+        except Exception as e:
+            logger.error(f"❌ Error saving test results: {str(e)}")
+            return False
+        
+        # Save comprehensive test report
+        report_path = output_dir / f"end_to_end_test_report_{timestamp}.txt"
+        with open(report_path, 'w', encoding='utf-8') as f:
+            f.write(f"# End-to-End AI Service Test Report\n")
+            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
+            
+            f.write(f"## Test Configuration\n")
+            f.write(f"- Documents processed: {len(documents)}\n")
+            f.write(f"- Processing method: Intelligent Token-Aware Merging\n")
+            f.write(f"- Parallel processing: {ai_options.enableParallelProcessing}\n")
+            f.write(f"- Max concurrent chunks: {ai_options.maxConcurrentChunks}\n")
+            f.write(f"- Chunk metadata preserved: {ai_options.preserveChunkMetadata}\n")
+            f.write(f"- Chunk separator: '{ai_options.chunkSeparator}'\n\n")
+            
+            f.write(f"## Document Inventory\n")
+            for i, doc in enumerate(documents, 1):
+                f.write(f"{i}. **{doc.fileName}**\n")
+                f.write(f"   - MIME Type: {doc.mimeType}\n")
+                f.write(f"   - File Size: {doc.fileSize:,} bytes\n")
+                f.write(f"   - File ID: {doc.fileId}\n\n")
+            
+            f.write(f"## Test Results Summary\n")
+            f.write(f"- Total Tests: {len(test_results)}\n")
+            f.write(f"- Successful: {len(successful_tests)}\n")
+            f.write(f"- Failed: {len(failed_tests)}\n")
+            f.write(f"- Success Rate: {len(successful_tests)/len(test_results)*100:.1f}%\n\n")
+            
+            f.write(f"## Detailed Test Results\n")
+            for i, result in enumerate(test_results, 1):
+                f.write(f"### Test {i}: {result['test_name']}\n")
+                f.write(f"**Status:** {'✅ PASS' if result['success'] else '❌ FAIL'}\n")
+                
+                if result['success']:
+                    f.write(f"**Response Type:** {result['response_type']}\n")
+                    f.write(f"**Response Length:** {result['response_length']} characters\n")
+                    
+                    # Show response preview
+                    response_preview = str(result['response'])[:500]
+                    f.write(f"**Response Preview:**\n```\n{response_preview}...\n```\n\n")
+                else:
+                    f.write(f"**Error:** {result['error']}\n\n")
+            
+            f.write(f"## Technical Implementation Details\n")
+            f.write(f"This test validates the complete AI service pipeline:\n\n")
+            f.write(f"### Tested Components:\n")
+            f.write(f"- **Document Extraction**: PDF, DOCX, images, etc.\n")
+            f.write(f"- **Intelligent Chunking**: Token-aware merging\n")
+            f.write(f"- **Model Selection**: Automatic AI model choice\n")
+            f.write(f"- **Parallel Processing**: Concurrent chunk processing\n")
+            f.write(f"- **Document Generation**: DOCX, PDF, text output\n")
+            f.write(f"- **Error Handling**: Graceful failure management\n\n")
+            
+            f.write(f"### Performance Metrics:\n")
+            f.write(f"- **Chunk Optimization**: Intelligent merging reduces AI calls\n")
+            f.write(f"- **Processing Speed**: Parallel execution\n")
+            f.write(f"- **Memory Efficiency**: Token-aware chunking\n")
+            f.write(f"- **Output Quality**: Multiple format support\n\n")
+            
+            f.write(f"## Generated Files\n")
+            for i, result in enumerate(successful_tests, 1):
+                test_name = result['test_name'].replace(' ', '_').lower()
+                f.write(f"- **Test {i}**: {result['test_name']} → `{test_name}_*_{timestamp}.*`\n")
+            
+            if failed_tests:
+                f.write(f"- **Failed Tests**: `failed_tests_{timestamp}.txt`\n")
+            
+            f.write(f"- **This Report**: `end_to_end_test_report_{timestamp}.txt`\n\n")
+            
+            f.write(f"The end-to-end test successfully validates the complete AI service\n")
+            f.write(f"pipeline from document input to formatted output generation.\n")
+        
+        logger.info(f"✅ Comprehensive test report saved: {report_path}")
+        
+        # Show debug file locations
+        debug_files = []
+        try:
+            debug_dir = Path("test-chat")
+            if debug_dir.exists():
+                debug_files.extend(list(debug_dir.glob("*.log")))
+                debug_files.extend(list(debug_dir.glob("ai/*.txt")))
+            
+            if debug_files:
+                logger.info("📁 Debug files created:")
+                for debug_file in debug_files:
+                    logger.info(f"  - {debug_file}")
+            else:
+                logger.info("📁 No debug files found in test-chat directory")
+        except Exception as e:
+            logger.warning(f"Could not list debug files: {e}")
+        
+        # Restore original database interface
+        db_interface_module.getInterface = original_get_interface
+        
+        return True
+        
+    except Exception as e:
+        logger.error(f"❌ Error during document processing: {str(e)}")
+        import traceback
+        logger.error(f"Traceback: {traceback.format_exc()}")
+        
+        # Restore original database interface in case of error
+        try:
+            db_interface_module.getInterface = original_get_interface
+        except:
+            pass
+        
+        return False
+
+async def main():
+    """Main function to run the intelligent chunk integration test."""
+    logger.info("🎯 Starting Intelligent Chunk Integration Test")
+    logger.info("=" * 60)
+    
+    success = await process_documents_and_generate_summary()
+    
+    if success:
+        logger.info("🎉 Intelligent chunk integration test completed successfully!")
+        logger.info("✅ Main AI service handled all processing internally")
+        logger.info("✅ Intelligent token-aware merging activated")
+        logger.info("✅ DOCX document generated directly by AI service")
+        logger.info("✅ Detailed chunk integration analysis saved")
+        logger.info("✅ Performance optimization achieved")
+    else:
+        logger.error("❌ Test failed!")
+        logger.error("Please check the error messages above for details")
+    
+    logger.info("=" * 60)
+
+if __name__ == "__main__":
+    asyncio.run(main())
--- a/tool_security_encrypt_all_env_files.py
+++ b/tool_security_encrypt_all_env_files.py
@ -0,0 +1,422 @@
+#!/usr/bin/env python3
+"""
+Tool for encrypting all *_SECRET variables in all environment files.
+
+This tool automatically processes all three environment files (dev, int, prod)
+and encrypts any unencrypted *_SECRET variables using the appropriate encryption
+keys for each environment.
+
+Usage:
+    # Encrypt all secrets in all environment files
+    python tool_security_encrypt_all_env_files.py
+    
+    # Dry run - show what would be changed without making changes
+    python tool_security_encrypt_all_env_files.py --dry-run
+    
+    # Skip backup creation
+    python tool_security_encrypt_all_env_files.py --no-backup
+    
+    # Process only specific environment files
+    python tool_security_encrypt_all_env_files.py --files env_dev.env env_prod.env
+"""
+
+import sys
+import os
+import argparse
+import shutil
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any
+
+# Add the modules directory to the Python path
+current_dir = Path(__file__).parent
+modules_dir = current_dir / 'modules'
+if modules_dir.exists():
+    sys.path.insert(0, str(modules_dir))
+else:
+    print(f"Error: Modules directory not found: {modules_dir}")
+    print(f"Make sure you're running this script from the gateway directory")
+    sys.exit(1)
+
+# Import encryption functions
+try:
+    from modules.shared.configuration import encrypt_value
+except ImportError as e:
+    print(f"Error: Could not import encryption functions from shared.configuration: {e}")
+    print(f"Make sure you're running this script from the gateway directory")
+    print(f"Modules directory: {modules_dir}")
+    sys.exit(1)
+
+def get_env_type_from_file(file_path: Path) -> str:
+    """
+    Read the APP_ENV_TYPE from the environment file.
+    
+    Args:
+        file_path: Path to the environment file
+        
+    Returns:
+        str: The environment type (dev, int, prod) or 'dev' as default
+    """
+    if not file_path.exists():
+        return 'dev'
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            for line in f:
+                line = line.strip()
+                if line.startswith('APP_ENV_TYPE') and '=' in line:
+                    _, value = line.split('=', 1)
+                    return value.strip().lower()
+    except Exception as e:
+        print(f"Warning: Could not read APP_ENV_TYPE from {file_path}: {e}")
+    
+    return 'dev'
+
+def is_any_encrypted_value(value: str) -> bool:
+    """
+    Check if a value has any encryption prefix (DEV_ENC:, INT_ENC:, PROD_ENC:, etc.).
+    
+    Args:
+        value: The value to check
+        
+    Returns:
+        bool: True if the value has any encryption prefix, False otherwise
+    """
+    if not value or not isinstance(value, str):
+        return False
+    
+    # Check for any environment-specific encryption prefixes
+    return (value.startswith('DEV_ENC:') or 
+            value.startswith('INT_ENC:') or 
+            value.startswith('PROD_ENC:') or
+            value.startswith('TEST_ENC:') or
+            value.startswith('STAGING_ENC:'))
+
+def find_secret_keys_in_file(file_path: Path) -> list:
+    """
+    Find all *_SECRET keys in an environment file that are not encrypted.
+    
+    Args:
+        file_path: Path to the environment file
+        
+    Returns:
+        list: List of tuples (line_number, key, value, full_line)
+    """
+    secret_keys = []
+    
+    if not file_path.exists():
+        return secret_keys
+    
+    # Get the environment type from the file itself
+    file_env_type = get_env_type_from_file(file_path)
+    
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+        
+        i = 0
+        while i < len(lines):
+            line = lines[i].strip()
+            
+            # Skip empty lines and comments
+            if not line or line.startswith('#'):
+                i += 1
+                continue
+            
+            # Check if line contains a key-value pair
+            if '=' in line:
+                key, value = line.split('=', 1)
+                key = key.strip()
+                value = value.strip()
+                
+                # Check if it's a secret key and not already encrypted with ANY prefix
+                if key.endswith('_SECRET') and value and not is_any_encrypted_value(value):
+                    # Check if value starts with { (JSON object)
+                    if value.startswith('{'):
+                        # Collect all lines until we find the closing }
+                        json_lines = [value]
+                        start_line = i + 1
+                        i += 1
+                        brace_count = value.count('{') - value.count('}')
+                        
+                        while i < len(lines) and brace_count > 0:
+                            json_lines.append(lines[i].rstrip('\n'))
+                            brace_count += lines[i].count('{') - lines[i].count('}')
+                            i += 1
+                        
+                        # Join all lines and create the full JSON value
+                        full_json_value = '\n'.join(json_lines)
+                        secret_keys.append((start_line, key, full_json_value, line))
+                        i -= 1  # Adjust for the loop increment
+                    else:
+                        # Single line value
+                        secret_keys.append((i + 1, key, value, line))
+                # Check if it's a secret key with multiline JSON (value is just "{")
+                elif key.endswith('_SECRET') and value == '{' and not is_any_encrypted_value(value):
+                    # Collect all lines until we find the closing }
+                    json_lines = [value]
+                    start_line = i + 1
+                    i += 1
+                    brace_count = 1  # We already have one opening brace
+                    
+                    while i < len(lines) and brace_count > 0:
+                        json_lines.append(lines[i].rstrip('\n'))
+                        brace_count += lines[i].count('{') - lines[i].count('}')
+                        i += 1
+                    
+                    # Join all lines and create the full JSON value
+                    full_json_value = '\n'.join(json_lines)
+                    secret_keys.append((start_line, key, full_json_value, line))
+                    i -= 1  # Adjust for the loop increment
+            
+            i += 1
+    
+    except Exception as e:
+        print(f"Error reading {file_path}: {e}")
+    
+    return secret_keys
+
+def backup_file(file_path: Path) -> Path:
+    """
+    Create a backup of the file before modification.
+    
+    Args:
+        file_path: Path to the file to backup
+        
+    Returns:
+        Path: Path to the backup file
+    """
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    backup_path = file_path.with_suffix(f'.{timestamp}.backup')
+    shutil.copy2(file_path, backup_path)
+    return backup_path
+
+def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_backup: bool = True) -> Dict[str, Any]:
+    """
+    Encrypt all non-encrypted secrets in a file.
+    
+    Args:
+        file_path: Path to the environment file
+        dry_run: If True, only show what would be changed
+        create_backup: If True, create a backup before modifying
+        
+    Returns:
+        dict: Results of the encryption process
+    """
+    # Get the environment type from the file itself
+    file_env_type = get_env_type_from_file(file_path)
+    
+    results = {
+        'file': str(file_path),
+        'env_type': file_env_type,
+        'secrets_found': 0,
+        'secrets_encrypted': 0,
+        'errors': [],
+        'backup_created': None
+    }
+    
+    # Find all secret keys
+    secret_keys = find_secret_keys_in_file(file_path)
+    results['secrets_found'] = len(secret_keys)
+    
+    if not secret_keys:
+        print(f"   ✅ No unencrypted secrets found - all values already have encryption prefixes")
+        return results
+    
+    print(f"   Found {len(secret_keys)} non-encrypted secrets")
+    
+    if dry_run:
+        print("   [DRY RUN] Would encrypt the following secrets:")
+        for line_num, key, value, full_line in secret_keys:
+            print(f"     Line {line_num}: {key} = {value[:50]}{'...' if len(value) > 50 else ''}")
+        return results
+    
+    # Create backup if requested
+    if create_backup:
+        try:
+            backup_path = backup_file(file_path)
+            results['backup_created'] = str(backup_path)
+            print(f"   📋 Backup created: {backup_path.name}")
+        except Exception as e:
+            results['errors'].append(f"Failed to create backup: {e}")
+            print(f"   ⚠️  Warning: Could not create backup: {e}")
+    
+    # Read the file content
+    try:
+        with open(file_path, 'r', encoding='utf-8') as f:
+            lines = f.readlines()
+    except Exception as e:
+        results['errors'].append(f"Failed to read file: {e}")
+        return results
+    
+    # Process each secret key
+    for line_num, key, value, full_line in secret_keys:
+        try:
+            print(f"   🔐 Encrypting {key}...")
+            
+            # Encrypt the value using the environment type from the file
+            encrypted_value = encrypt_value(value, file_env_type)
+            
+            # Replace the line in the file content
+            new_line = f"{key} = {encrypted_value}\n"
+            lines[line_num - 1] = new_line
+            
+            # If this was a multiline JSON, we need to remove the remaining lines
+            if value.startswith('{') and '\n' in value:
+                # Count how many lines the original JSON spanned
+                json_lines = value.split('\n')
+                lines_to_remove = len(json_lines) - 1  # -1 because we already replaced the first line
+                
+                # Remove the remaining lines
+                for i in range(line_num, line_num + lines_to_remove):
+                    if i < len(lines):
+                        lines[i] = ""
+            
+            results['secrets_encrypted'] += 1
+            print(f"      ✓ Encrypted successfully")
+            
+        except Exception as e:
+            error_msg = f"Failed to encrypt {key}: {e}"
+            results['errors'].append(error_msg)
+            print(f"      ✗ {error_msg}")
+    
+    # Write the modified content back to the file
+    if results['secrets_encrypted'] > 0:
+        try:
+            with open(file_path, 'w', encoding='utf-8') as f:
+                f.writelines(lines)
+            print(f"   💾 File updated successfully")
+        except Exception as e:
+            results['errors'].append(f"Failed to write file: {e}")
+            print(f"   ✗ Failed to write file: {e}")
+    
+    return results
+
+def process_all_env_files(env_files: List[str] = None, dry_run: bool = False, create_backup: bool = True) -> Dict[str, Any]:
+    """
+    Process all environment files and encrypt unencrypted secrets.
+    
+    Args:
+        env_files: List of specific files to process (if None, processes all three default files)
+        dry_run: If True, only show what would be changed
+        create_backup: If True, create backups before modifying
+        
+    Returns:
+        dict: Summary of all processing results
+    """
+    # Default environment files if none specified
+    if env_files is None:
+        env_files = ['env_dev.env', 'env_int.env', 'env_prod.env']
+    
+    # Convert to Path objects and check if they exist
+    env_paths = []
+    for env_file in env_files:
+        env_path = Path(env_file)
+        if not env_path.exists():
+            print(f"⚠️  Warning: Environment file not found: {env_file}")
+            continue
+        env_paths.append(env_path)
+    
+    if not env_paths:
+        print("❌ No valid environment files found to process")
+        return {'total_files': 0, 'total_secrets_found': 0, 'total_secrets_encrypted': 0, 'total_errors': 0, 'files': []}
+    
+    print("🔐 PowerOn Batch Secret Encryption Tool")
+    print("=" * 60)
+    print("⚠️  IMPORTANT: The tool will read APP_ENV_TYPE from each file itself")
+    print("⚠️  Each file will be processed with its own environment-specific encryption")
+    print()
+    
+    if dry_run:
+        print("🔍 DRY RUN MODE - No changes will be made")
+        print()
+    
+    # Process each file
+    all_results = []
+    total_secrets_found = 0
+    total_secrets_encrypted = 0
+    total_errors = 0
+    
+    for env_path in env_paths:
+        print(f"\n📁 Processing {env_path.name}:")
+        results = encrypt_all_secrets_in_file(env_path, dry_run, create_backup)
+        all_results.append(results)
+        
+        total_secrets_found += results['secrets_found']
+        total_secrets_encrypted += results['secrets_encrypted']
+        total_errors += len(results['errors'])
+    
+    # Summary
+    print("\n" + "=" * 60)
+    print("📊 SUMMARY")
+    print("=" * 60)
+    print(f"Files processed: {len(env_paths)}")
+    print(f"Total secrets found: {total_secrets_found}")
+    
+    if not dry_run:
+        print(f"Total secrets encrypted: {total_secrets_encrypted}")
+        print(f"Total errors: {total_errors}")
+        
+        if total_errors == 0 and total_secrets_encrypted > 0:
+            print("\n🎉 All secrets encrypted successfully!")
+        elif total_errors > 0:
+            print(f"\n⚠️  Completed with {total_errors} errors")
+        else:
+            print("\n✅ No secrets needed encryption")
+    else:
+        print(f"Secrets that would be encrypted: {total_secrets_found}")
+    
+    # Show backup information
+    backups_created = [r['backup_created'] for r in all_results if r['backup_created']]
+    if backups_created:
+        print(f"\n📋 Backups created: {len(backups_created)}")
+        for backup in backups_created:
+            print(f"   - {Path(backup).name}")
+    
+    # Show errors if any
+    all_errors = []
+    for results in all_results:
+        all_errors.extend(results['errors'])
+    
+    if all_errors:
+        print(f"\n❌ Errors encountered:")
+        for error in all_errors:
+            print(f"   - {error}")
+    
+    return {
+        'total_files': len(env_paths),
+        'total_secrets_found': total_secrets_found,
+        'total_secrets_encrypted': total_secrets_encrypted,
+        'total_errors': total_errors,
+        'files': all_results
+    }
+
+def main():
+    parser = argparse.ArgumentParser(description='Encrypt all *_SECRET variables in all environment files')
+    parser.add_argument('--files', '-f', nargs='+', 
+                       help='Specific environment files to process (default: all three env files)')
+    parser.add_argument('--dry-run', action='store_true',
+                       help='Show what would be changed without making changes')
+    parser.add_argument('--no-backup', action='store_true',
+                       help='Skip creating backup files')
+    
+    args = parser.parse_args()
+    
+    try:
+        results = process_all_env_files(
+            env_files=args.files,
+            dry_run=args.dry_run,
+            create_backup=not args.no_backup
+        )
+        
+        # Return appropriate exit code
+        if results['total_errors'] > 0:
+            return 1
+        return 0
+        
+    except Exception as e:
+        print(f"Error: {e}")
+        return 1
+
+if __name__ == '__main__':
+    sys.exit(main())