Merge branch 'int' into feat/chatbot

2025-10-15 12:38:42 +02:00 · 2025-10-15 12:38:42 +02:00 · 57118a633e
commit 57118a633e
parent e8e3b0c0db e9756bbc17
93 changed files with 13774 additions and 3691 deletions
--- a/config.ini
+++ b/config.ini
@ -29,4 +29,9 @@ Web_Search_MIN_RESULTS = 1
 # Web Crawl configuration
 Web_Crawl_TIMEOUT = 30
 Web_Crawl_MAX_RETRIES = 3
-Web_Crawl_RETRY_DELAY = 2
+Web_Crawl_RETRY_DELAY = 2
 # Web Research configuration
 Web_Research_MAX_DEPTH = 2
 Web_Research_MAX_LINKS_PER_DOMAIN = 4
 Web_Research_CRAWL_TIMEOUT_MINUTES = 10
--- a/env_dev.env
+++ b/env_dev.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000
 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = DEV_ENC:Z0FBQUFBQm82Mzk2Q1MwZ0dNcUVBcUtuRDJIcTZkMXVvYnpjM3JEMzJiT1NKSHljX282ZDIyZTJYc09VSTdVNXAtOWU2UXp5S193NTk5dHJsWlFjRjhWektFOG1DVGY4ZUhHTXMzS0RPN1lNcF9nSlVWbW5BZ1hkZDVTejl6bVZNRFVvX29xamJidWRFMmtjQmkyRUQ2RUh6UTN1aWNPSUJBPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000
 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEQk4xYnpmbnItUEU3dHU4eHB5dzVYay1WT012RTRLUWJDTlBILVY5dC1FX3VMNjZmLThrbDRFNWFSNGprY3RRTlpYNGlubVBpNnY3MjNJcGtzVk9PMzRacl9LUlM2RU5vTVVZWHJvaUhWSHVfc1pNR0pfQmI5SEprOG5KdlB1QnQ=
+Service_MSFT_CLIENT_SECRET = DEV_ENC:Z0FBQUFBQm83T29rV1pQelMtc1p1MXR4NTFpa19CTEhHQ0xfNmdPUmZqcWp5UHBMS0hYTGl4c1pPdmhTNTJVWUl5WnlnUUZhV0VTRzVCb0d5YjR1NnZPZk5CZ0dGazNGdUJVbjkxeVdrYlNiVjJUYzF2aVFtQnVxTHFqTTJqZlF0RTFGNmE1OGN1TEk=
 Service_MSFT_TENANT_ID = common
 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpETk5FWWM3Q0JKMzhI
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = DEV_ENC:Z0FBQUFBQm8xSUpEbm0yRUJ6VUJKbUwyRW5kMnRaNW4wM2YxMkJUTXVXZUdmdVRCaUZIVHU2TTV2RWZLRmUtZkcwZE4yRUNlNDQ0aUJWYjNfdVg5YjV5c2JwMHhoUUYxZWdkeS11bXR0eGxRLWRVaVU3cUVQZWJlNDRtY1lWUDdqeDVFSlpXS0VFX21WajlRS3lHQjc0bS11akkybWV3QUFlR2hNWUNYLUdiRjZuN2dQODdDSExXWG1Dd2ZGclI2aUhlSWhETVZuY3hYdnhkb2c2LU1JTFBvWFpTNmZtMkNVOTZTejJwbDI2eGE0OS1xUlIwQnlCSmFxRFNCeVJNVzlOMDhTR1VUamx4RDRyV3p6Tk9qVHBrWWdySUM3TVRaYjd3N0JHMFhpdzFhZTNDLTFkRVQ2RVE4U19COXRhRWtNc0NVOHRqUS1CRDFpZ19xQmtFLU9YSDU3TXBZQXpVcld3PT0=
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = True
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/env_int.env
+++ b/env_int.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000
 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = INT_ENC:Z0FBQUFBQm82Mzk2UWZJdUFhSW8yc3RKc0tKRXphd0xWMkZOVlFpSGZ4SGhFWnk0cTF5VjlKQVZjdS1QSWdkS0pUSWw4OFU5MjUxdTVQel9aeWVIZTZ5TXRuVmFkZG0zWEdTOGdHMHpsTzI0TGlWYURKU1Q0VVpKTlhxUk5FTmN6SUJScDZ3ZldIaUJZcWpaQVRiSEpyQm9tRTNDWk9KTnZBPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000
 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm8xSVRjNzB2M3ZjaE1SVE9ON2FKam9yVURxcHl1Ym5VNVUtS0MyWUpNVXVlaWpWS2U3VVd3em9vQl9lcnVYay03bS04YjNBbDZZNTB4eUtjT3ppQjJjY3dOT0FNLW9LeDhIUU5iaTNqNURUWE5La3kzaHNGcU9yNVI0YjhWZTZRRFktcTk=
+Service_MSFT_CLIENT_SECRET = INT_ENC:Z0FBQUFBQm83T29rMDZvcV9qTG5xb1FzUkdqS1llbzRxSEJXbmpONFFtcUtfZXdtZjQybmJSMjBjMEpnRVhiOGRuczZvVFBFdVVTQV80SG9PSnRQTEpLdVViNm5wc2E5aGRLWjZ4TGF1QjVkNmdRSzBpNWNkYXVublFYclVEdEM5TVBBZWVVMW5RVWk=
 Service_MSFT_TENANT_ID = common
 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkNmVXZ1pWcHcydTF2
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = INT_ENC:Z0FBQUFBQm8xSVRkTUNsWm4wX0p6eXFDZmJ4dFdHNEs1MV9MUzdrb3RzeC1jVWVYZ0REWHRyZkFiaGZLcUQtTXFBZzZkNzRmQ0gxbEhGbUNlVVFfR1JEQTc0aldkZkgyWnBOcjdlUlZxR0tDTEdKRExULXAyUEtsVmNTMkRKU1BJNnFiM0hlMXo4YndMcHlRMExtZDQ3Zm9vNFhMcEZCcHpBPT0=
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/env_prod.env
+++ b/env_prod.env
@ -66,14 +66,14 @@ Connector_AiAnthropic_MAX_TOKENS = 2000
 # Perplexity AI configuration
 Connector_AiPerplexity_API_URL = https://api.perplexity.ai/chat/completions
-Connector_AiPerplexity_API_SECRET = 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
+Connector_AiPerplexity_API_SECRET = PROD_ENC:Z0FBQUFBQm82Mzk2Q1FGRkJEUkI4LXlQbHYzT2RkdVJEcmM4WGdZTWpJTEhoeUF1NW5LUVpJdDBYN3k1WFN4a2FQSWJSQmd0U0xJbzZDTmFFN05FcXl0Z3V1OEpsZjYydV94TXVjVjVXRTRYSWdLMkd5XzZIbFV6emRCZHpuOUpQeThadE5xcDNDVGV1RHJrUEN0c1BBYXctZFNWcFRuVXhRPT0=
 Connector_AiPerplexity_MODEL_NAME = sonar
 Connector_AiPerplexity_TEMPERATURE = 0.2
 Connector_AiPerplexity_MAX_TOKENS = 2000
 # Agent Mail configuration
 Service_MSFT_CLIENT_ID = c7e7112d-61dc-4f3a-8cd3-08cc4cd7504c
-Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pVEhHdlZHU3FNMmhuRGVwaGc3YzIxSjlZNzBCQjlOV2pSYVNXb0t1ZnVwQzZsQzY4cHMtVlZtNF85OEVaV1BMTzdXMmpzaGZpaG1DalJ0bkNPMHA5ZUcwZjNDdGk1TFdxYTJSZnVrVmhhZ2VRUEZxbjJOOGFhWk9EYlY3dmRVTnI=
+Service_MSFT_CLIENT_SECRET = PROD_ENC:Z0FBQUFBQm83T29rSzdYLTRydXN5V3lQLXhmRjMyQ1FOaGpuek45QllaX1REN2s5aWNIUl81NGlrYlJTeFV0RlRZd0xPcm5uMDM4QlpibHJQbm5XZTlWeWxfcWNVdFpCUHI2amh0MVBnZ21IN2ptSkhWLTVfaHEwNmI5SEtiS05pQmt5eV8yMnhLMEc=
 Service_MSFT_TENANT_ID = common
 # Google Service configuration
@ -88,3 +88,7 @@ Connector_GoogleSpeech_API_KEY_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pNjlJdmFMeERXUUQ
 # Feature SyncDelta JIRA configuration
 Feature_SyncDelta_JIRA_DELTA_TOKEN_SECRET = PROD_ENC:Z0FBQUFBQm8xSU5pTDhnTVNzRUhScU8wYnZsZk52bHFkSWxLc18xQmtCeC1HbnNwTzVBbXRNTmQzRjZYaGE2MVlCNGtnWDk1T2I5VXVKNHpKU1VRbXEyN2tRWUJnU2ltZE5qZ3lmNEF6Z1hMTTEwZkk2NUNBYjhmVTJEcWpRUW9HNEVpSGFWdjBWQXQ3eUtHUTFJS3U5QWpaeno0RFNhMUxnPT0=
 # Debug Configuration
 APP_DEBUG_CHAT_WORKFLOW_ENABLED = FALSE
 APP_DEBUG_CHAT_WORKFLOW_DIR = ./test-chat
--- a/modules/connectors/connectorAiAnthropic.py
+++ b/modules/connectors/connectorAiAnthropic.py
@ -1,5 +1,6 @@
 import logging
 import httpx
 import os
 from typing import Dict, Any, List, Union
 from fastapi import HTTPException
 from modules.shared.configuration import APP_CONFIG
@ -147,6 +148,11 @@ class AiAnthropic:
                    # Direct content as string (in older API versions)
                    content = anthropicResponse["content"]
            # Debug logging for empty responses
            if not content or content.strip() == "":
                logger.warning(f"Anthropic API returned empty content. Full response: {anthropicResponse}")
                content = "[Anthropic API returned empty response]"
            # Return in OpenAI format
            return {
                "id": anthropicResponse.get("id", ""),
@ -182,14 +188,27 @@ class AiAnthropic:
            The analysis response as text
        """
        try:
            # Debug logging
            logger.info(f"callAiImage called with imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
            # Distinguish between file path and binary data
            if isinstance(imageData, str):
-                # It's a file path - import filehandling only when needed
+                # Check if it's base64 encoded data or a file path
-                from modules import agentserviceFilemanager as fileHandler
+                if len(imageData) > 100 and not os.path.exists(imageData):
-                base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
+                    # It's likely base64 encoded data
-                mimeType = mimeType or autoMimeType
+                    logger.info("Treating imageData as base64 encoded string")
                    base64Data = imageData
                    if not mimeType:
                        mimeType = "image/png"
                else:
                    # It's a file path - import filehandling only when needed
                    logger.info(f"Treating imageData as file path: {imageData}")
                    from modules import agentserviceFilemanager as fileHandler
                    base64Data, autoMimeType = fileHandler.encodeFileToBase64(imageData)
                    mimeType = mimeType or autoMimeType
            else:
                # It's binary data
                logger.info("Treating imageData as binary data")
                import base64
                base64Data = base64.b64encode(imageData).decode('utf-8')
                # MIME type must be specified for binary data
@ -216,8 +235,16 @@ class AiAnthropic:
            # Use the existing callAiBasic function with the Vision model
            response = await self.callAiBasic(messages)
-            # Extract and return content
+            # Extract and return content with proper error handling
-            return response["choices"][0]["message"]["content"]
+            try:
                content = response["choices"][0]["message"]["content"]
                if content is None or content.strip() == "":
                    return "[AI returned empty response for image analysis]"
                return content
            except (KeyError, IndexError, TypeError) as e:
                logger.error(f"Error extracting content from AI response: {str(e)}")
                logger.error(f"Response structure: {response}")
                return f"[Error extracting AI response: {str(e)}]"
        except Exception as e:
            logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
--- a/modules/connectors/connectorAiOpenai.py
+++ b/modules/connectors/connectorAiOpenai.py
@ -188,4 +188,83 @@ class AiOpenai:
        except Exception as e:
            logger.error(f"Error during image analysis: {str(e)}", exc_info=True)
-            return f"[Error during image analysis: {str(e)}]"
+            return f"[Error during image analysis: {str(e)}]"
    async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid") -> Dict[str, Any]:
        """
        Generate an image using DALL-E 3.
        Args:
            prompt: The text prompt for image generation
            size: Image size (1024x1024, 1792x1024, or 1024x1792)
            quality: Image quality (standard or hd)
            style: Image style (vivid or natural)
        Returns:
            Dictionary with success status and image data
        """
        try:
            logger.debug(f"Starting image generation with prompt: '{prompt[:100]}...'")
            # DALL-E 3 API endpoint
            dalle_url = "https://api.openai.com/v1/images/generations"
            payload = {
                "model": "dall-e-3",
                "prompt": prompt,
                "size": size,
                "quality": quality,
                "style": style,
                "n": 1,
                "response_format": "b64_json"  # Get base64 data directly instead of URLs
            }
            # Create a separate client for DALL-E API calls
            dalle_client = httpx.AsyncClient(
                timeout=120.0,
                headers={
                    "Authorization": f"Bearer {self.apiKey}",
                    "Content-Type": "application/json"
                }
            )
            response = await dalle_client.post(
                dalle_url,
                json=payload
            )
            await dalle_client.aclose()
            if response.status_code != 200:
                logger.error(f"DALL-E API error: {response.status_code} - {response.text}")
                return {
                    "success": False,
                    "error": f"DALL-E API error: {response.status_code} - {response.text}"
                }
            responseJson = response.json()
            if "data" in responseJson and len(responseJson["data"]) > 0:
                image_data = responseJson["data"][0]["b64_json"]
                logger.info(f"Successfully generated image: {len(image_data)} characters")
                return {
                    "success": True,
                    "image_data": image_data,
                    "size": size,
                    "quality": quality,
                    "style": style
                }
            else:
                logger.error("No image data in DALL-E response")
                return {
                    "success": False,
                    "error": "No image data in DALL-E response"
                }
        except Exception as e:
            logger.error(f"Error during image generation: {str(e)}", exc_info=True)
            return {
                "success": False,
                "error": f"Error during image generation: {str(e)}"
            }
--- a/modules/connectors/connectorAiTavily.py
+++ b/modules/connectors/connectorAiTavily.py
@ -271,6 +271,7 @@ class ConnectorWeb:
        include_domains: list[str] | None = None,
        exclude_domains: list[str] | None = None,
        language: str | None = None,
        country: str | None = None,
        include_answer: bool | None = None,
        include_raw_content: bool | None = None,
    ) -> list[WebSearchResult]:
@ -290,17 +291,20 @@ class ConnectorWeb:
            kwargs["time_range"] = time_range
        if topic is not None:
            kwargs["topic"] = topic
-        if include_domains is not None:
+        if include_domains is not None and len(include_domains) > 0:
            kwargs["include_domains"] = include_domains
        if exclude_domains is not None:
            kwargs["exclude_domains"] = exclude_domains
        if language is not None:
            kwargs["language"] = language
        if country is not None:
            kwargs["country"] = country
        if include_answer is not None:
            kwargs["include_answer"] = include_answer
        if include_raw_content is not None:
            kwargs["include_raw_content"] = include_raw_content
        logger.debug(f"Tavily.search kwargs: {kwargs}")
        response = await self.client.search(**kwargs)
        return [
--- a/modules/datamodels/datamodelAi.py
+++ b/modules/datamodels/datamodelAi.py
@ -135,3 +135,29 @@ class AiCallResponse(BaseModel):
    costEstimate: Optional[float] = Field(default=None, description="Estimated cost of the call")
 class EnhancedAiCallOptions(AiCallOptions):
    """Enhanced options for improved document processing with chunk mapping."""
    # Parallel processing
    enableParallelProcessing: bool = Field(
        default=True, 
        description="Enable parallel processing of chunks"
    )
    maxConcurrentChunks: int = Field(
        default=5, 
        ge=1, 
        le=20, 
        description="Maximum number of chunks to process concurrently"
    )
    # Chunk mapping
    preserveChunkMetadata: bool = Field(
        default=True, 
        description="Preserve chunk metadata during processing"
    )
    chunkSeparator: str = Field(
        default="\n\n---\n\n", 
        description="Separator between chunks in merged output"
    )
--- a/modules/datamodels/datamodelDocument.py
+++ b/modules/datamodels/datamodelDocument.py
@ -0,0 +1,130 @@
 from typing import Any, Dict, List, Optional, Literal, Union
 from pydantic import BaseModel, Field
 from datetime import datetime
 class DocumentMetadata(BaseModel):
    """Metadata for the entire document."""
    title: str = Field(description="Document title")
    author: Optional[str] = Field(default=None, description="Document author")
    created_at: datetime = Field(default_factory=datetime.now, description="Creation timestamp")
    source_documents: List[str] = Field(default_factory=list, description="Source document IDs")
    extraction_method: str = Field(default="ai_extraction", description="Method used for extraction")
    version: str = Field(default="1.0", description="Document version")
 class TableData(BaseModel):
    """Structured table data."""
    headers: List[str] = Field(description="Table column headers")
    rows: List[List[str]] = Field(description="Table data rows")
    caption: Optional[str] = Field(default=None, description="Table caption")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Table metadata")
 class ListItem(BaseModel):
    """Individual list item with optional sub-items."""
    text: str = Field(description="List item text")
    subitems: Optional[List['ListItem']] = Field(default=None, description="Nested sub-items")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Item metadata")
 class BulletList(BaseModel):
    """Bulleted or numbered list."""
    items: List[ListItem] = Field(description="List items")
    list_type: Literal["bullet", "numbered", "checklist"] = Field(default="bullet", description="List type")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="List metadata")
 class Paragraph(BaseModel):
    """Text paragraph with optional formatting."""
    text: str = Field(description="Paragraph text")
    formatting: Optional[Dict[str, Any]] = Field(default=None, description="Text formatting (bold, italic, etc.)")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Paragraph metadata")
 class Heading(BaseModel):
    """Document heading."""
    text: str = Field(description="Heading text")
    level: int = Field(ge=1, le=6, description="Heading level (1-6)")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Heading metadata")
 class CodeBlock(BaseModel):
    """Code block with syntax highlighting."""
    code: str = Field(description="Code content")
    language: Optional[str] = Field(default=None, description="Programming language")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Code block metadata")
 class Image(BaseModel):
    """Image with metadata."""
    data: str = Field(description="Base64 encoded image data")
    alt_text: Optional[str] = Field(default=None, description="Alternative text")
    caption: Optional[str] = Field(default=None, description="Image caption")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Image metadata")
 class DocumentSection(BaseModel):
    """A section of the document containing one or more content elements."""
    id: str = Field(description="Unique section identifier")
    title: Optional[str] = Field(default=None, description="Section title")
    content_type: Literal["table", "list", "paragraph", "heading", "code", "image", "mixed"] = Field(description="Primary content type")
    elements: List[Union[TableData, BulletList, Paragraph, Heading, CodeBlock, Image]] = Field(description="Content elements in this section")
    order: int = Field(description="Section order in document")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Section metadata")
 class StructuredDocument(BaseModel):
    """Complete structured document in JSON format."""
    metadata: DocumentMetadata = Field(description="Document metadata")
    sections: List[DocumentSection] = Field(description="Document sections")
    summary: Optional[str] = Field(default=None, description="Document summary")
    tags: List[str] = Field(default_factory=list, description="Document tags")
    def get_sections_by_type(self, content_type: str) -> List[DocumentSection]:
        """Get all sections of a specific content type."""
        return [section for section in self.sections if section.content_type == content_type]
    def get_all_tables(self) -> List[TableData]:
        """Get all table data from the document."""
        tables = []
        for section in self.sections:
            for element in section.elements:
                if isinstance(element, TableData):
                    tables.append(element)
        return tables
    def get_all_lists(self) -> List[BulletList]:
        """Get all lists from the document."""
        lists = []
        for section in self.sections:
            for element in section.elements:
                if isinstance(element, BulletList):
                    lists.append(element)
        return lists
 class JsonChunkResult(BaseModel):
    """Result from processing a single chunk with JSON output."""
    chunk_id: str = Field(description="Chunk identifier")
    document_section: DocumentSection = Field(description="Structured content from this chunk")
    processing_time: float = Field(description="Processing time in seconds")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Chunk processing metadata")
 class JsonMergeResult(BaseModel):
    """Result from merging multiple JSON chunks."""
    merged_document: StructuredDocument = Field(description="Merged structured document")
    merge_strategy: str = Field(description="Strategy used for merging")
    chunks_processed: int = Field(description="Number of chunks processed")
    merge_time: float = Field(description="Time taken to merge chunks")
    metadata: Dict[str, Any] = Field(default_factory=dict, description="Merge process metadata")
 # Update forward references (compatible with Pydantic v1 and v2)
 try:
    # Pydantic v2
    ListItem.model_rebuild()
 except AttributeError:
    # Pydantic v1
    ListItem.update_forward_refs()
--- a/modules/datamodels/datamodelExtraction.py
+++ b/modules/datamodels/datamodelExtraction.py
@ -18,6 +18,16 @@ class ContentExtracted(BaseModel):
    summary: Optional[Dict[str, Any]] = Field(default=None, description="Optional extraction summary")
 class ChunkResult(BaseModel):
    """Preserves the relationship between a chunk and its AI result."""
    originalChunk: ContentPart
    aiResult: str
    chunkIndex: int
    documentId: str
    processingTime: float = 0.0
    metadata: Dict[str, Any] = Field(default_factory=dict)
 class MergeStrategy(BaseModel):
    """Strategy configuration for merging content parts and AI results."""
--- a/modules/interfaces/interfaceAiObjects.py
+++ b/modules/interfaces/interfaceAiObjects.py
@ -1,4 +1,5 @@
 import logging
 import asyncio
 from typing import Dict, Any, List, Union, Tuple, Optional
 from dataclasses import dataclass
@ -260,6 +261,7 @@ class AiObjects:
        if not requiredTags:
            requiredTags = OPERATION_TAG_MAPPING.get(options.operationType, [ModelTags.TEXT, ModelTags.CHAT])
        # Override priority based on processing mode if not explicitly set
        effectivePriority = options.priority
        if options.priority == Priority.BALANCED:
@ -268,6 +270,7 @@ class AiObjects:
        logger.info(f"Model selection - Operation: {options.operationType}, Required tags: {requiredTags}, Priority: {effectivePriority}")
        for name, info in aiModels.items():
            logger.info(f"Checking model: {name}, tags: {info.get('tags', [])}, function: {info.get('function', 'unknown')}")
            # Check context length
            if info["contextLength"] > 0 and totalSize > info["contextLength"] * 0.8:
                continue
@ -279,8 +282,11 @@ class AiObjects:
            # Check required tags/capabilities
            modelTags = info.get("tags", [])
-            if requiredTags and not any(tag in modelTags for tag in requiredTags):
+            if requiredTags and not all(tag in modelTags for tag in requiredTags):
                logger.info(f"  -> Skipping {name}: missing required tags. Has: {modelTags}, needs: {requiredTags}")
                continue
            else:
                logger.info(f"  -> {name} passed tag check")
            # Check processing mode requirements
            if options.processingMode == ProcessingMode.DETAILED and ModelTags.FAST in modelTags:
@ -288,16 +294,24 @@ class AiObjects:
                continue
            candidates[name] = info
            logger.info(f"  -> {name} added to candidates")
        logger.info(f"Final candidates: {list(candidates.keys())}")
        if not candidates:
            logger.info("No candidates found, using fallback")
            # Fallback based on operation type
            if options.operationType == OperationType.IMAGE_ANALYSIS:
                logger.info("Using fallback: openai_callAiImage")
                return "openai_callAiImage"
            elif options.operationType == OperationType.IMAGE_GENERATION:
                logger.info("Using fallback: openai_generateImage")
                return "openai_generateImage"
            elif options.operationType == OperationType.WEB_RESEARCH:
                logger.info("Using fallback: perplexity_callAiWithWebSearch")
                return "perplexity_callAiWithWebSearch"
            else:
                logger.info("Using fallback: openai_callAiBasic_gpt35")
                return "openai_callAiBasic_gpt35"
        # Special handling for planning operations - use Claude for consistency
@ -313,17 +327,60 @@ class AiObjects:
        # Select based on priority for other operations
        if effectivePriority == Priority.SPEED:
-            return max(candidates, key=lambda k: candidates[k]["speedRating"])
+            selected = max(candidates, key=lambda k: candidates[k]["speedRating"])
            logger.info(f"Selected by SPEED: {selected}")
            return selected
        elif effectivePriority == Priority.QUALITY:
-            return max(candidates, key=lambda k: candidates[k]["qualityRating"])
+            selected = max(candidates, key=lambda k: candidates[k]["qualityRating"])
            logger.info(f"Selected by QUALITY: {selected}")
            return selected
        elif effectivePriority == Priority.COST:
-            return min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
+            selected = min(candidates, key=lambda k: candidates[k]["costPer1kTokens"])
            logger.info(f"Selected by COST: {selected}")
            return selected
        else:  # BALANCED
            def balancedScore(name: str) -> float:
                info = candidates[name]
                return info["qualityRating"] * 0.4 + info["speedRating"] * 0.3 + (10 - info["costPer1kTokens"] * 1000) * 0.3
-            return max(candidates, key=balancedScore)
+            selected = max(candidates, key=balancedScore)
            logger.info(f"Selected by BALANCED: {selected}")
            return selected
    def _getFallbackModels(self, operationType: str) -> List[str]:
        """Get ordered list of fallback models for a given operation type."""
        fallbackMappings = {
            OperationType.GENERAL: [
                "openai_callAiBasic_gpt35",  # Fast and reliable
                "openai_callAiBasic",         # High quality
                "anthropic_callAiBasic",      # Alternative high quality
                "perplexity_callAiBasic"      # Cost effective
            ],
            OperationType.IMAGE_ANALYSIS: [
                "openai_callAiImage",         # Primary image analysis
                "anthropic_callAiImage"       # Alternative image analysis
            ],
            OperationType.IMAGE_GENERATION: [
                "openai_generateImage"         # Only image generation model
            ],
            OperationType.WEB_RESEARCH: [
                "perplexity_callAiWithWebSearch",  # Primary web research
                "perplexity_callAiBasic",          # Alternative with web search
                "openai_callAiBasic"               # Fallback to general model
            ],
            OperationType.GENERATE_PLAN: [
                "anthropic_callAiBasic",      # Best for planning
                "openai_callAiBasic",         # High quality alternative
                "openai_callAiBasic_gpt35"   # Fast fallback
            ],
            OperationType.ANALYSE_CONTENT: [
                "anthropic_callAiBasic",     # Best for analysis
                "openai_callAiBasic",        # High quality alternative
                "openai_callAiBasic_gpt35"  # Fast fallback
            ]
        }
        return fallbackMappings.get(operationType, fallbackMappings[OperationType.GENERAL])
    def _connectorFor(self, modelName: str):
        """Get the appropriate connector for the model."""
@ -340,7 +397,7 @@ class AiObjects:
            raise ValueError(f"Unknown connector type: {connectorType}")
    async def call(self, request: AiCallRequest) -> AiCallResponse:
-        """Call AI model for text generation."""
+        """Call AI model for text generation with fallback mechanism."""
        prompt = request.prompt
        context = request.context or ""
        options = request.options
@ -357,9 +414,6 @@ class AiObjects:
        if options.compressContext and len(context.encode("utf-8")) > 70000:
            context = maybeTruncate(context, 70000)
        # Select model for text generation
        modelName = self._selectModel(prompt, context, options)
        # Derive generation parameters
        temperature = getattr(options, "temperature", None)
        if temperature is None:
@ -376,58 +430,112 @@ class AiObjects:
            messages.append({"role": "system", "content": f"Context from documents:\n{context}"})
        messages.append({"role": "user", "content": prompt})
-        connector = self._connectorFor(modelName)
+        # Get fallback models for this operation type
-        functionName = aiModels[modelName]["function"]
+        fallbackModels = self._getFallbackModels(options.operationType)
-        # Call the appropriate function
+        # Try primary model first, then fallbacks
-        if functionName == "callAiBasic":
+        lastError = None
-            if aiModels[modelName]["connector"] == "openai":
+        for attempt, modelName in enumerate(fallbackModels):
-                content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
+            try:
-            elif aiModels[modelName]["connector"] == "perplexity":
+                logger.info(f"Attempting AI call with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
-                content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
+                
-            else:
+                connector = self._connectorFor(modelName)
-                response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
+                functionName = aiModels[modelName]["function"]
-                content = response["choices"][0]["message"]["content"]
+                
-        elif functionName == "callAiWithWebSearch":
+                # Call the appropriate function
-            # Perplexity web search function
+                if functionName == "callAiBasic":
-            query = prompt
+                    if aiModels[modelName]["connector"] == "openai":
-            if context:
+                        content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
-                query = f"Context: {context}\n\nQuery: {prompt}"
+                    elif aiModels[modelName]["connector"] == "perplexity":
-            content = await connector.callAiWithWebSearch(query)
+                        content = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
-        elif functionName == "researchTopic":
+                    else:
-            # Perplexity research function
+                        response = await connector.callAiBasic(messages, temperature=temperature, maxTokens=maxTokens)
-            content = await connector.researchTopic(prompt)
+                        content = response["choices"][0]["message"]["content"]
-        elif functionName == "answerQuestion":
+                elif functionName == "callAiWithWebSearch":
-            # Perplexity question answering function
+                    # Perplexity web search function
-            content = await connector.answerQuestion(prompt, context)
+                    query = prompt
-        elif functionName == "getCurrentNews":
+                    if context:
-            # Perplexity news function
+                        query = f"Context: {context}\n\nQuery: {prompt}"
-            content = await connector.getCurrentNews(prompt)
+                    content = await connector.callAiWithWebSearch(query)
-        else:
+                elif functionName == "researchTopic":
-            raise ValueError(f"Function {functionName} not supported for text generation")
+                    # Perplexity research function
                    content = await connector.researchTopic(prompt)
                elif functionName == "answerQuestion":
                    # Perplexity question answering function
                    content = await connector.answerQuestion(prompt, context)
                elif functionName == "getCurrentNews":
                    # Perplexity news function
                    content = await connector.getCurrentNews(prompt)
                else:
                    raise ValueError(f"Function {functionName} not supported for text generation")
-        # Estimate cost/tokens
+                # Success! Estimate cost/tokens and return
-        totalSize = len((prompt + context).encode("utf-8"))
+                totalSize = len((prompt + context).encode("utf-8"))
-        cost = self._estimateCost(aiModels[modelName], totalSize)
+                cost = self._estimateCost(aiModels[modelName], totalSize)
-        usedTokens = int(totalSize / 4)
+                usedTokens = int(totalSize / 4)
                logger.info(f"✅ AI call successful with model: {modelName}")
                return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
            except Exception as e:
                lastError = e
                logger.warning(f"❌ AI call failed with model {modelName}: {str(e)}")
                # If this is not the last model, try the next one
                if attempt < len(fallbackModels) - 1:
                    logger.info(f"🔄 Trying next fallback model...")
                    continue
                else:
                    # All models failed
                    logger.error(f"💥 All {len(fallbackModels)} models failed for operation {options.operationType}")
                    break
-        return AiCallResponse(content=content, modelName=modelName, usedTokens=usedTokens, costEstimate=cost)
+        # All fallback attempts failed
        errorMsg = f"All AI models failed for operation {options.operationType}. Last error: {str(lastError)}"
        logger.error(errorMsg)
        raise Exception(errorMsg)
    async def callImage(self, prompt: str, imageData: Union[str, bytes], mimeType: str = None, options: AiCallOptions = None) -> str:
-        """Call AI model for image analysis."""
+        """Call AI model for image analysis with fallback mechanism."""
        if options is None:
            options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
-        # Select model for image analysis
+        # Get fallback models for image analysis
-        modelName = self._selectModel(prompt, "", options)
+        fallbackModels = self._getFallbackModels(OperationType.IMAGE_ANALYSIS)
-        connector = self._connectorFor(modelName)
+        # Try primary model first, then fallbacks
-        functionName = aiModels[modelName]["function"]
+        lastError = None
-        
+        for attempt, modelName in enumerate(fallbackModels):
-        if functionName == "callAiImage":
+            try:
-            return await connector.callAiImage(prompt, imageData, mimeType)
+                logger.info(f"Attempting image analysis with model: {modelName} (attempt {attempt + 1}/{len(fallbackModels)})")
-        else:
+                
-            raise ValueError(f"Function {functionName} not supported for image analysis")
+                connector = self._connectorFor(modelName)
                functionName = aiModels[modelName]["function"]
                if functionName == "callAiImage":
                    content = await connector.callAiImage(prompt, imageData, mimeType)
                    logger.info(f"✅ Image analysis successful with model: {modelName}")
                    return content
                else:
                    raise ValueError(f"Function {functionName} not supported for image analysis")
            except Exception as e:
                lastError = e
                logger.warning(f"❌ Image analysis failed with model {modelName}: {str(e)}")
                # If this is not the last model, try the next one
                if attempt < len(fallbackModels) - 1:
                    logger.info(f"🔄 Trying next fallback model for image analysis...")
                    continue
                else:
                    # All models failed
                    logger.error(f"💥 All {len(fallbackModels)} models failed for image analysis")
                    break
        # All fallback attempts failed
        errorMsg = f"All AI models failed for image analysis. Last error: {str(lastError)}"
        logger.error(errorMsg)
        raise Exception(errorMsg)
    async def generateImage(self, prompt: str, size: str = "1024x1024", quality: str = "standard", style: str = "vivid", options: AiCallOptions = None) -> Dict[str, Any]:
        """Generate an image using AI."""
@ -694,7 +802,22 @@ class AiObjects:
            logger.warning(f"Failed to extract links from content: {e}")
            return []
-    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10) -> Dict[str, str]:
+    def _normalizeUrl(self, url: str) -> str:
        """Normalize URL to handle variations that should be considered duplicates."""
        if not url:
            return url
        # Remove trailing slashes and fragments
        url = url.rstrip('/')
        if '#' in url:
            url = url.split('#')[0]
        # Handle common URL variations
        url = url.replace('http://', 'https://')  # Normalize protocol
        return url
    async def crawlRecursively(self, urls: List[str], max_depth: int, extract_depth: str = "advanced", max_per_domain: int = 10, global_processed_urls: Optional[set] = None) -> Dict[str, str]:
        """
        Recursively crawl URLs up to specified depth.
@ -703,76 +826,100 @@ class AiObjects:
            max_depth: Maximum depth to crawl (1=main pages only, 2=main+sub-pages, etc.)
            extract_depth: Tavily extract depth setting
            max_per_domain: Maximum URLs per domain per level
            global_processed_urls: Optional global set to track processed URLs across sessions
        Returns:
            Dictionary mapping URL -> content for all crawled pages
        """
        logger.info(f"Starting recursive crawl: {len(urls)} starting URLs, max_depth={max_depth}")
-        # URL index to track all processed URLs
+        # URL index to track all processed URLs (local + global)
        processed_urls = set()
        if global_processed_urls is not None:
            # Use global index if provided, otherwise create local one
            processed_urls = global_processed_urls
            logger.info(f"Using global URL index with {len(processed_urls)} already processed URLs")
        else:
            logger.info("Using local URL index for this crawl session")
        all_content = {}
        # Current level URLs to process
        current_level_urls = urls.copy()
-        for depth in range(1, max_depth + 1):
+        try:
-            logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
+            for depth in range(1, max_depth + 1):
-            logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
+                logger.info(f"=== DEPTH LEVEL {depth}/{max_depth} ===")
-            
+                logger.info(f"Processing {len(current_level_urls)} URLs at depth {depth}")
            # URLs found at this level (for next iteration)
            next_level_urls = []
            for url in current_level_urls:
                if url in processed_urls:
                    logger.debug(f"URL {url} already processed, skipping")
                    continue
-                try:
+                # URLs found at this level (for next iteration)
-                    logger.info(f"Processing URL at depth {depth}: {url}")
+                next_level_urls = []
                for url in current_level_urls:
                    # Normalize URL for duplicate checking
                    normalized_url = self._normalizeUrl(url)
                    if normalized_url in processed_urls:
                        logger.debug(f"URL {url} (normalized: {normalized_url}) already processed, skipping")
                        continue
-                    # Read page content
+                    try:
-                    content = await self.readPage(url, extract_depth)
+                        logger.info(f"Processing URL at depth {depth}: {url}")
-                    if content:
+                        logger.debug(f"Total processed URLs so far: {len(processed_urls)}")
                        all_content[url] = content
                        processed_urls.add(url)
                        logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
-                        # Get URLs from this page for next level
+                        # Read page content
-                        page_urls = await self.getUrlsFromPage(url, extract_depth)
+                        content = await self.readPage(url, extract_depth)
-                        logger.info(f"Found {len(page_urls)} URLs on {url}")
+                        if content:
-                        
+                            all_content[url] = content
-                        # Filter URLs and add to next level
+                            processed_urls.add(normalized_url)
-                        filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
+                            logger.info(f"✓ Successfully processed {url}: {len(content)} chars")
-                        logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
+                            
-                        
+                            # Get URLs from this page for next level
-                        # Add new URLs to next level (avoiding already processed ones)
+                            page_urls = await self.getUrlsFromPage(url, extract_depth)
-                        new_urls_count = 0
+                            logger.info(f"Found {len(page_urls)} URLs on {url}")
-                        for new_url in filtered_urls:
+                            
-                            if new_url not in processed_urls:
+                            # Filter URLs and add to next level
-                                next_level_urls.append(new_url)
+                            filtered_urls = self.filterUrlsOnlyPages(page_urls, max_per_domain)
-                                new_urls_count += 1
+                            logger.info(f"Filtered to {len(filtered_urls)} valid URLs")
-                        
+                            
-                        logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
+                            # Add new URLs to next level (avoiding already processed ones)
-                    else:
+                            new_urls_count = 0
-                        logger.warning(f"✗ No content extracted from {url}")
+                            for new_url in filtered_urls:
-                        processed_urls.add(url)  # Mark as processed to avoid retry
+                                normalized_new_url = self._normalizeUrl(new_url)
-                        
+                                if normalized_new_url not in processed_urls:
-                except Exception as e:
+                                    next_level_urls.append(new_url)
-                    logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
+                                    new_urls_count += 1
-                    processed_urls.add(url)  # Mark as processed to avoid retry
+                                else:
                                    logger.debug(f"URL {new_url} (normalized: {normalized_new_url}) already processed, skipping")
                            logger.info(f"Added {new_urls_count} new URLs to next level from {url}")
                        else:
                            logger.warning(f"✗ No content extracted from {url}")
                            processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                    except Exception as e:
                        logger.warning(f"✗ Failed to process URL {url} at depth {depth}: {e}")
                        processed_urls.add(normalized_url)  # Mark as processed to avoid retry
                # Prepare for next iteration
                current_level_urls = next_level_urls
                logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
                # Stop if no more URLs to process
                if not current_level_urls:
                    logger.info(f"No more URLs found at depth {depth}, stopping recursion")
                    break
-            # Prepare for next iteration
+            logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
-            current_level_urls = next_level_urls
+            logger.info(f"Total URLs processed (including skipped): {len(processed_urls)}")
-            logger.info(f"Depth {depth} completed. Found {len(next_level_urls)} URLs for next level")
+            logger.info(f"Unique URLs found: {len(all_content)}")
            return all_content
-            # Stop if no more URLs to process
+        except asyncio.TimeoutError:
-            if not current_level_urls:
+            logger.warning(f"Crawling timed out, returning partial results: {len(all_content)} pages crawled so far")
-                logger.info(f"No more URLs found at depth {depth}, stopping recursion")
+            return all_content
-                break
+        except Exception as e:
-        
+            logger.error(f"Crawling failed with error: {e}, returning partial results: {len(all_content)} pages crawled so far")
-        logger.info(f"Recursive crawl completed: {len(all_content)} total pages crawled")
+            return all_content
        return all_content
    async def webQuery(self, query: str, context: str = "", options: AiCallOptions = None) -> str:
        """Use Perplexity AI to provide the best answers for web-related queries."""
--- a/modules/interfaces/interfaceDbChatObjects.py
+++ b/modules/interfaces/interfaceDbChatObjects.py
@ -571,8 +571,10 @@ class ChatObjects:
                actionName=createdMessage.get("actionName")
            )
-            # Debug: Store message and documents for debugging TODO REMOVE
+            # Debug: Store message and documents for debugging - only if debug enabled
-            self._storeDebugMessageAndDocuments(chat_message)
+            debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if debug_enabled:
                self._storeDebugMessageAndDocuments(chat_message)
            return chat_message
@ -1052,8 +1054,11 @@ class ChatObjects:
    def _storeDebugMessageAndDocuments(self, message: ChatMessage) -> None:
        """
-        Store message and documents for debugging purposes in fileshare.
+        Store message and documents (metadata and file bytes) for debugging purposes.
-        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/documents
+        Structure: gateway/test-chat/messages/m_round_task_action_timestamp/documentlist_label/
        - message.json, message_text.txt
        - document_###_metadata.json
        - document_###_<original_filename> (actual file bytes)
        Args:
            message: ChatMessage object to store
@ -1156,6 +1161,26 @@ class ChatObjects:
                            json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str)
                        logger.info(f"Debug: Stored document metadata for {doc.fileName}")
                        # Also store the actual file bytes next to metadata for debugging
                        try:
                            # Lazy import to avoid circular deps at module load
                            from modules.interfaces import interfaceDbComponentObjects as comp
                            componentInterface = comp.getInterface(self.currentUser)
                            file_bytes = componentInterface.getFileData(doc.fileId)
                            if file_bytes:
                                # Build a safe filename preserving original name
                                safe_name = doc.fileName or f"document_{i+1:03d}"
                                # Avoid path traversal
                                safe_name = os.path.basename(safe_name)
                                doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name)
                                with open(doc_file_path, "wb") as df:
                                    df.write(file_bytes)
                                logger.info(f"Debug: Stored document file bytes: {doc_file_path} ({len(file_bytes)} bytes)")
                            else:
                                logger.warning(f"Debug: No file bytes returned for fileId {doc.fileId}")
                        except Exception as e:
                            logger.error(f"Debug: Failed to store document file for {doc.fileName} (fileId {doc.fileId}): {e}")
            logger.info(f"Debug: Stored message and documents in {message_path}")
--- a/modules/routes/routeDataPrompts.py
+++ b/modules/routes/routeDataPrompts.py
@ -95,8 +95,8 @@ async def update_prompt(
            detail=f"Prompt with ID {promptId} not found"
        )
-    # Convert Prompt to dict for interface
+    # Convert Prompt to dict for interface, excluding the id field
-    update_data = promptData.dict()
+    update_data = promptData.dict(exclude={'id'})
    # Update prompt
    updatedPrompt = managementInterface.updatePrompt(promptId, update_data)
--- a/modules/routes/routeSecurityLocal.py
+++ b/modules/routes/routeSecurityLocal.py
@ -14,7 +14,7 @@ from pydantic import BaseModel
 # Import auth modules
 from modules.security.auth import getCurrentUser, limiter, SECRET_KEY, ALGORITHM
-from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie
+from modules.security.jwtService import createAccessToken, createRefreshToken, setAccessTokenCookie, setRefreshTokenCookie, clearAccessTokenCookie, clearRefreshTokenCookie
 from modules.interfaces.interfaceDbAppObjects import getInterface, getRootInterface
 from modules.datamodels.datamodelUam import User, UserInDB, AuthAuthority, UserPrivilege
 from modules.datamodels.datamodelSecurity import Token
@ -263,8 +263,7 @@ async def read_user_me(
@limiter.limit("60/minute")
 async def refresh_token(
    request: Request,
-    response: Response,
+    response: Response
    currentUser: User = Depends(getCurrentUser)
 ) -> Dict[str, Any]:
    """Refresh access token using refresh token from cookie"""
    try:
@ -283,12 +282,27 @@ async def refresh_token(
        except jwt.JWTError:
            raise HTTPException(status_code=401, detail="Invalid refresh token")
        # Get user information from refresh token payload
        user_id = payload.get("userId")
        if not user_id:
            raise HTTPException(status_code=401, detail="Invalid refresh token - missing user ID")
        # Get user from database using the user ID from refresh token
        try:
            app_interface = getRootInterface()
            current_user = app_interface.getUser(user_id)
            if not current_user:
                raise HTTPException(status_code=401, detail="User not found")
        except Exception as e:
            logger.error(f"Failed to get user from database: {str(e)}")
            raise HTTPException(status_code=500, detail="Failed to validate user")
        # Create new token data
        token_data = {
-            "sub": currentUser.username,
+            "sub": current_user.username,
-            "mandateId": str(currentUser.mandateId),
+            "mandateId": str(current_user.mandateId),
-            "userId": str(currentUser.id),
+            "userId": str(current_user.id),
-            "authenticationAuthority": currentUser.authenticationAuthority
+            "authenticationAuthority": current_user.authenticationAuthority
        }
        # Create new access token + set cookie
@ -365,15 +379,18 @@ async def logout(request: Request, response: Response, currentUser: User = Depen
            # Don't fail if audit logging fails
            pass
-        # Clear httpOnly cookies
+        # Create the JSON response first
-        response.delete_cookie(key="auth_token", httponly=True, samesite="strict")
+        json_response = JSONResponse({
        response.delete_cookie(key="refresh_token", httponly=True, samesite="strict")
        return JSONResponse({
            "message": "Successfully logged out - cookies cleared",
            "revokedTokens": revoked
        })
        # Clear httpOnly cookies on the response we're actually returning
        clearAccessTokenCookie(json_response)
        clearRefreshTokenCookie(json_response)
        return json_response
    except Exception as e:
        logger.error(f"Error during logout: {str(e)}")
        raise HTTPException(
--- a/modules/security/jwtService.py
+++ b/modules/security/jwtService.py
@ -17,6 +17,11 @@ ALGORITHM = APP_CONFIG.get("Auth_ALGORITHM")
 ACCESS_TOKEN_EXPIRE_MINUTES = int(APP_CONFIG.get("APP_TOKEN_EXPIRY"))
 REFRESH_TOKEN_EXPIRE_DAYS = int(APP_CONFIG.get("APP_REFRESH_TOKEN_EXPIRY", "7"))
 # Cookie security settings - use secure cookies based on whether API uses HTTPS
 # Cookies must have secure=True on HTTPS sites, secure=False on HTTP sites
 APP_API_URL = APP_CONFIG.get("APP_API_URL", "http://localhost:8000")
 USE_SECURE_COOKIES = APP_API_URL.startswith("https://") if APP_API_URL else False
 def createAccessToken(data: dict, expiresDelta: Optional[timedelta] = None) -> Tuple[str, "datetime"]:
    """Create a JWT access token and return (token, expiresAt)."""
@ -52,8 +57,9 @@ def setAccessTokenCookie(response: Response, token: str, expiresDelta: Optional[
        key="auth_token",
        value=token,
        httponly=True,
-        secure=True,
+        secure=USE_SECURE_COOKIES,  # Only secure in production (HTTPS)
        samesite="strict",
        path="/",
        max_age=maxAge
    )
@ -64,9 +70,46 @@ def setRefreshTokenCookie(response: Response, token: str) -> None:
        key="refresh_token",
        value=token,
        httponly=True,
-        secure=True,
+        secure=USE_SECURE_COOKIES,  # Only secure in production (HTTPS)
        samesite="strict",
        path="/",
        max_age=REFRESH_TOKEN_EXPIRE_DAYS * 24 * 60 * 60
    )
 def clearAccessTokenCookie(response: Response) -> None:
    """
    Clear access token cookie by setting it to expire immediately.
    Uses both raw header manipulation and FastAPI's delete_cookie for maximum browser compatibility.
    """
    # Build secure flag based on environment
    secure_flag = "; Secure" if USE_SECURE_COOKIES else ""
    # Primary method: Raw Set-Cookie header for guaranteed deletion
    response.headers.append(
        "Set-Cookie",
        f"auth_token=deleted; Path=/; Max-Age=0; Expires=Thu, 01 Jan 1970 00:00:00 GMT; HttpOnly{secure_flag}; SameSite=Strict"
    )
    # Fallback: Also use FastAPI's built-in method
    response.delete_cookie(key="auth_token", path="/")
 def clearRefreshTokenCookie(response: Response) -> None:
    """
    Clear refresh token cookie by setting it to expire immediately.
    Uses both raw header manipulation and FastAPI's delete_cookie for maximum browser compatibility.
    """
    # Build secure flag based on environment
    secure_flag = "; Secure" if USE_SECURE_COOKIES else ""
    # Primary method: Raw Set-Cookie header for guaranteed deletion
    response.headers.append(
        "Set-Cookie",
        f"refresh_token=deleted; Path=/; Max-Age=0; Expires=Thu, 01 Jan 1970 00:00:00 GMT; HttpOnly{secure_flag}; SameSite=Strict"
    )
    # Fallback: Also use FastAPI's built-in method
    response.delete_cookie(key="refresh_token", path="/")
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
--- a/modules/services/serviceAi/subCoreAi.py
+++ b/modules/services/serviceAi/subCoreAi.py
@ -0,0 +1,596 @@
 import logging
 from typing import Dict, Any, List, Optional, Tuple, Union
 from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument
 from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, ModelCapabilities, OperationType, Priority
 from modules.interfaces.interfaceAiObjects import AiObjects
 logger = logging.getLogger(__name__)
 class SubCoreAi:
    """Core AI operations including image analysis, text generation, and planning calls."""
    def __init__(self, services, aiObjects):
        """Initialize core AI operations.
        Args:
            services: Service center instance for accessing other services
            aiObjects: Initialized AiObjects instance
        """
        self.services = services
        self.aiObjects = aiObjects
    # AI Processing Call
    async def callAi(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]] = None,
        placeholders: Optional[List[PromptPlaceholder]] = None,
        options: Optional[AiCallOptions] = None,
        outputFormat: Optional[str] = None,
        title: Optional[str] = None,
        documentProcessor=None,
        documentGenerator=None
        ) -> Union[str, Dict[str, Any]]:
        """
        Unified AI call interface that automatically routes to appropriate handler.
        Args:
            prompt: The main prompt for the AI call
            documents: Optional list of documents to process
            placeholders: Optional list of placeholder replacements for planning calls
            options: AI call configuration options
            outputFormat: Optional output format (html, pdf, docx, txt, md, json, csv, xlsx) for document generation
            title: Optional title for generated documents
            documentProcessor: Document processing service instance
            documentGenerator: Document generation service instance
        Returns:
            AI response as string, or dict with documents if outputFormat is specified
        Raises:
            Exception: If all available models fail
        """
        if options is None:
            options = AiCallOptions()
        # Normalize placeholders from List[PromptPlaceholder]
        placeholders_dict: Dict[str, str] = {}
        placeholders_meta: Dict[str, bool] = {}
        if placeholders:
            placeholders_dict = {p.label: p.content for p in placeholders}
            placeholders_meta = {p.label: bool(getattr(p, 'summaryAllowed', False)) for p in placeholders}
        # Auto-determine call type based on documents and operation type
        call_type = self._determineCallType(documents, options.operationType)
        options.callType = call_type
        try:
            # Build the full prompt that will be sent to AI
            if placeholders:
                full_prompt = prompt
                for p in placeholders:
                    placeholder = f"{{{{KEY:{p.label}}}}}"
                    full_prompt = full_prompt.replace(placeholder, p.content)
            else:
                full_prompt = prompt
            self._writeAiResponseDebug(
                label='ai_prompt_debug',
                content=full_prompt,
                partIndex=1,
                modelName=None,
                continuation=False
            )
        except Exception:
            pass
        # Handle document generation with specific output format
        if outputFormat and documentGenerator:
            result = await documentGenerator.callAiWithDocumentGeneration(prompt, documents, options, outputFormat, title)
            # Log AI response for debugging
            try:
                if isinstance(result, dict) and 'content' in result:
                    self._writeAiResponseDebug(
                        label='ai_document_generation',
                        content=result['content'],
                        partIndex=1,
                        modelName=None,  # Document generation doesn't return model info
                        continuation=False
                    )
            except Exception:
                pass
            return result
        if call_type == "planning":
            result = await self._callAiPlanning(prompt, placeholders_dict, placeholders_meta, options)
            # Log AI response for debugging
            try:
                self._writeAiResponseDebug(
                    label='ai_planning',
                    content=result or "",
                    partIndex=1,
                    modelName=None,  # Planning doesn't return model info
                    continuation=False
                )
            except Exception:
                pass
            return result
        else:
            # Set processDocumentsIndividually from the legacy parameter if not set in options
            if options.processDocumentsIndividually is None and documents:
                options.processDocumentsIndividually = False  # Default to batch processing
            # For text calls, we need to build the full prompt with placeholders here
            # since _callAiText doesn't handle placeholders directly
            if placeholders_dict:
                full_prompt = self._buildPromptWithPlaceholders(prompt, placeholders_dict)
            else:
                full_prompt = prompt
            if documentProcessor and documents:
                result = await documentProcessor.callAiText(full_prompt, documents, options)
            else:
                # Fallback to direct AI call if no document processor available
                request = AiCallRequest(
                    prompt=full_prompt,
                    context="",
                    options=options
                )
                response = await self.aiObjects.call(request)
                result = response.content
            # Log AI response for debugging (additional logging for text calls)
            try:
                self._writeAiResponseDebug(
                    label='ai_text_main',
                    content=result or "",
                    partIndex=1,
                    modelName=None,  # Text calls already log internally
                    continuation=False
                )
            except Exception:
                pass
            return result
    # AI Image Analysis
    async def readImage(
        self,
        prompt: str,
        imageData: Union[str, bytes],
        mimeType: str = None,
        options: Optional[AiCallOptions] = None,
        ) -> str:
        """Call AI for image analysis using interface.callImage()."""
        try:
            # Check if imageData is valid
            if not imageData:
                error_msg = "No image data provided"
                self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
                logger.error(f"Error in AI image analysis: {error_msg}")
                return f"Error: {error_msg}"
            self.services.utils.debugLogToFile(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}", "AI_SERVICE")
            logger.info(f"readImage called with prompt, imageData type: {type(imageData)}, length: {len(imageData) if imageData else 0}, mimeType: {mimeType}")
            # Always use IMAGE_ANALYSIS operation type for image processing
            if options is None:
                options = AiCallOptions(operationType=OperationType.IMAGE_ANALYSIS)
            else:
                # Override the operation type to ensure image analysis
                options.operationType = OperationType.IMAGE_ANALYSIS
            self.services.utils.debugLogToFile(f"Calling aiObjects.callImage with operationType: {options.operationType}", "AI_SERVICE")
            logger.info(f"Calling aiObjects.callImage with operationType: {options.operationType}")
            result = await self.aiObjects.callImage(prompt, imageData, mimeType, options)
            # Debug the result
            self.services.utils.debugLogToFile(f"Raw AI result type: {type(result)}, value: {repr(result)}", "AI_SERVICE")
            # Check if result is valid
            if not result or (isinstance(result, str) and not result.strip()):
                error_msg = f"No response from AI image analysis (result: {repr(result)})"
                self.services.utils.debugLogToFile(f"Error in AI image analysis: {error_msg}", "AI_SERVICE")
                logger.error(f"Error in AI image analysis: {error_msg}")
                return f"Error: {error_msg}"
            self.services.utils.debugLogToFile(f"callImage returned: {result[:200]}..." if len(result) > 200 else result, "AI_SERVICE")
            logger.info(f"callImage returned: {result[:200]}..." if len(result) > 200 else result)
            return result
        except Exception as e:
            self.services.utils.debugLogToFile(f"Error in AI image analysis: {str(e)}", "AI_SERVICE")
            logger.error(f"Error in AI image analysis: {str(e)}")
            return f"Error: {str(e)}"
    # AI Image Generation
    async def generateImage(
        self,
        prompt: str,
        size: str = "1024x1024",
        quality: str = "standard",
        style: str = "vivid",
        options: Optional[AiCallOptions] = None,
        ) -> Dict[str, Any]:
        """Generate an image using AI using interface.generateImage()."""
        try:
            return await self.aiObjects.generateImage(prompt, size, quality, style, options)
        except Exception as e:
            logger.error(f"Error in AI image generation: {str(e)}")
            return {"success": False, "error": str(e)}
    def _determineCallType(self, documents: Optional[List[ChatDocument]], operation_type: str) -> str:
        """
        Determine call type based on documents and operation type.
        Criteria: no documents AND operationType is "generate_plan" -> planning
        All other cases -> text
        """
        has_documents = documents is not None and len(documents) > 0
        is_planning_operation = operation_type == OperationType.GENERATE_PLAN
        if not has_documents and is_planning_operation:
            return "planning"
        else:
            return "text"
    async def _callAiPlanning(
        self,
        prompt: str,
        placeholders: Optional[Dict[str, str]],
        placeholdersMeta: Optional[Dict[str, bool]],
        options: AiCallOptions
        ) -> str:
        """
        Handle planning calls with placeholder system and selective summarization.
        """
        # Build full prompt with placeholders; if too large, summarize summaryAllowed placeholders proportionally
        effective_placeholders = placeholders or {}
        full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
        if options.compressPrompt and placeholdersMeta:
            # Determine model capacity
            try:
                caps = self._getModelCapabilitiesForContent(full_prompt, None, options)
                max_bytes = caps.get("maxContextBytes", len(full_prompt.encode("utf-8")))
            except Exception:
                max_bytes = len(full_prompt.encode("utf-8"))
            current_bytes = len(full_prompt.encode("utf-8"))
            if current_bytes > max_bytes:
                # Compute total bytes contributed by allowed placeholders (approximate by content length)
                allowed_labels = [l for l, allow in placeholdersMeta.items() if allow]
                allowed_sizes = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
                total_allowed = sum(allowed_sizes.values())
                overage = current_bytes - max_bytes
                if total_allowed > 0 and overage > 0:
                    # Target total for allowed after reduction
                    target_allowed = max(total_allowed - overage, 0)
                    # Global ratio to apply across allowed placeholders
                    ratio = target_allowed / total_allowed if total_allowed > 0 else 1.0
                    ratio = max(0.0, min(1.0, ratio))
                    reduced: Dict[str, str] = {}
                    for label, content in effective_placeholders.items():
                        if label in allowed_labels and isinstance(content, str) and len(content) > 0:
                            old_len = len(content)
                            # Reduce by proportional ratio on characters (fallback if empty)
                            reduction_factor = ratio if old_len > 0 else 1.0
                            reduced[label] = self._reduceText(content, reduction_factor)
                        else:
                            reduced[label] = content
                    effective_placeholders = reduced
                    full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
                    # If still slightly over, perform a second-pass fine adjustment with updated ratio
                    current_bytes = len(full_prompt.encode("utf-8"))
                    if current_bytes > max_bytes and total_allowed > 0:
                        overage2 = current_bytes - max_bytes
                        # Recompute allowed sizes after first reduction
                        allowed_sizes2 = {l: len((effective_placeholders.get(l) or "").encode("utf-8")) for l in allowed_labels}
                        total_allowed2 = sum(allowed_sizes2.values())
                        if total_allowed2 > 0 and overage2 > 0:
                            target_allowed2 = max(total_allowed2 - overage2, 0)
                            ratio2 = target_allowed2 / total_allowed2
                            ratio2 = max(0.0, min(1.0, ratio2))
                            reduced2: Dict[str, str] = {}
                            for label, content in effective_placeholders.items():
                                if label in allowed_labels and isinstance(content, str) and len(content) > 0:
                                    old_len = len(content)
                                    reduction_factor = ratio2 if old_len > 0 else 1.0
                                    reduced2[label] = self._reduceText(content, reduction_factor)
                                else:
                                    reduced2[label] = content
                            effective_placeholders = reduced2
                            full_prompt = self._buildPromptWithPlaceholders(prompt, effective_placeholders)
        # Make AI call using AiObjects (let it handle model selection)
        request = AiCallRequest(
            prompt=full_prompt,
            context="",  # Context is already included in the prompt
            options=options
        )
        response = await self.aiObjects.call(request)
        try:
            logger.debug(f"AI model selected (planning): {getattr(response, 'modelName', 'unknown')}")
        except Exception:
            pass
        return response.content
    async def _callAiDirect(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions,
        documentProcessor=None
        ) -> Dict[str, Any]:
        """
        Call AI directly with prompt and documents for JSON output.
        Used for multi-file generation - uses the existing generation pipeline.
        """
        # Use the existing generation pipeline that already works
        # This ensures proper document processing and content extraction
        logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents")
        if documentProcessor:
            # Process documents with JSON merging using the existing pipeline
            result = await documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
        else:
            # Fallback to simple AI call
            request = AiCallRequest(
                prompt=prompt,
                context="",
                options=options
            )
            response = await self.aiObjects.call(request)
            result = {"metadata": {"title": "AI Response"}, "sections": [{"id": "section_1", "content_type": "paragraph", "elements": [{"text": response.content}]}]}
        # Convert single-file result to multi-file format if needed
        if "sections" in result and "documents" not in result:
            logger.info("Converting single-file result to multi-file format")
            # This is a single-file result, convert it to multi-file format
            return {
                "metadata": result.get("metadata", {"title": "Converted Document"}),
                "documents": [{
                    "id": "doc_1",
                    "title": result.get("metadata", {}).get("title", "Document"),
                    "filename": "document.txt",
                    "sections": result.get("sections", [])
                }]
            }
        return result
    def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List[ChatDocument]], options: AiCallOptions) -> Dict[str, int]:
        """
        Get model capabilities for content processing, including appropriate size limits for chunking.
        """
        # Estimate total content size
        prompt_size = len(prompt.encode('utf-8'))
        document_size = 0
        if documents:
            # Rough estimate of document content size
            for doc in documents:
                document_size += doc.fileSize or 0
        total_size = prompt_size + document_size
        # Use AiObjects to select the best model for this content size
        # We'll simulate the model selection by checking available models
        from modules.interfaces.interfaceAiObjects import aiModels
        # Find the best model for this content size and operation
        best_model = None
        best_context_length = 0
        for model_name, model_info in aiModels.items():
            context_length = model_info.get("contextLength", 0)
            # Skip models with no context length or too small for content
            if context_length == 0:
                continue
            # Check if model supports the operation type
            capabilities = model_info.get("capabilities", [])
            if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
                continue
            elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
                continue
            elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
                continue
            elif "text_generation" not in capabilities:
                continue
            # Prefer models that can handle the content without chunking, but allow chunking if needed
            if context_length >= total_size * 0.8:  # 80% of content size
                if context_length > best_context_length:
                    best_model = model_info
                    best_context_length = context_length
            elif best_model is None:  # Fallback to largest available model
                if context_length > best_context_length:
                    best_model = model_info
                    best_context_length = context_length
        # Fallback to a reasonable default if no model found
        if best_model is None:
            best_model = {
                "contextLength": 128000,  # GPT-4o default
                "llmName": "gpt-4o"
            }
        # Calculate appropriate sizes
        # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
        context_length_bytes = int(best_model["contextLength"] * 4)
        max_context_bytes = int(context_length_bytes * 0.9)  # 90% of context length
        text_chunk_size = int(max_context_bytes * 0.7)  # 70% of max context for text chunks
        image_chunk_size = int(max_context_bytes * 0.8)  # 80% of max context for image chunks
        logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
        logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
        logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
        return {
            "maxContextBytes": max_context_bytes,
            "textChunkSize": text_chunk_size,
            "imageChunkSize": image_chunk_size
        }
    def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
        """
        Get models capable of handling the specific operation with capability filtering.
        """
        # Use the actual AI objects model selection instead of hardcoded default
        if hasattr(self, 'aiObjects') and self.aiObjects:
            # Let AiObjects handle the model selection
            return []
        else:
            # Fallback to default model if AiObjects not available
            default_model = ModelCapabilities(
                name="default",
                maxTokens=4000,
                capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
                costPerToken=0.001,
                processingTime=1.0,
                isAvailable=True
            )
            return [default_model]
    def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
        """
        Build full prompt by replacing placeholders with their content.
        Uses the new {{KEY:placeholder}} format.
        """
        if not placeholders:
            return prompt
        full_prompt = prompt
        for placeholder, content in placeholders.items():
            # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
            full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
            full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
        return full_prompt
    def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
        """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
        try:
            # Check if debug logging is enabled
            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if not debug_enabled:
                return
            import os
            from datetime import datetime, UTC
            # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
            # .../gateway/modules/services/serviceAi/subCoreAi.py -> up to gateway root
            gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
            outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
            os.makedirs(outDir, exist_ok=True)
            ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
            suffix = []
            if partIndex is not None:
                suffix.append(f"part{partIndex}")
            if continuation is not None:
                suffix.append(f"cont_{str(continuation).lower()}")
            if modelName:
                safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
                suffix.append(safeModel)
            suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
            fname = f"{ts}_{label}{suffixStr}.txt"
            fpath = os.path.join(outDir, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                f.write(content or '')
        except Exception:
            # Do not raise; best-effort debug write
            pass
    def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
        """
        Check if text exceeds model token limit with safety margin.
        """
        # Simple character-based estimation (4 chars per token)
        estimated_tokens = len(text) // 4
        max_tokens = int(model.maxTokens * (1 - safety_margin))
        return estimated_tokens > max_tokens
    def _reducePlanningPrompt(
        self,
        full_prompt: str,
        placeholders: Optional[Dict[str, str]],
        model: ModelCapabilities,
        options: AiCallOptions
        ) -> str:
        """
        Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
        """
        if not placeholders:
            return self._reduceText(full_prompt, 0.7)
        # Reduce placeholders while preserving prompt
        reduced_placeholders = {}
        for placeholder, content in placeholders.items():
            if len(content) > 1000:  # Only reduce long content
                reduction_factor = 0.7
                reduced_content = self._reduceText(content, reduction_factor)
                reduced_placeholders[placeholder] = reduced_content
            else:
                reduced_placeholders[placeholder] = content
        return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
    def _reduceTextPrompt(
        self,
        prompt: str,
        context: str,
        model: ModelCapabilities,
        options: AiCallOptions
        ) -> str:
        """
        Reduce text prompt size using typeGroup-aware chunking and merging.
        """
        max_size = int(model.maxTokens * (1 - options.safetyMargin))
        if options.compressPrompt:
            # Reduce both prompt and context
            target_size = max_size
            current_size = len(prompt) + len(context)
            reduction_factor = (target_size * 0.7) / current_size
            if reduction_factor < 1.0:
                prompt = self._reduceText(prompt, reduction_factor)
                context = self._reduceText(context, reduction_factor)
        else:
            # Only reduce context, preserve prompt integrity
            max_context_size = max_size - len(prompt)
            if len(context) > max_context_size:
                reduction_factor = max_context_size / len(context)
                context = self._reduceText(context, reduction_factor)
        return prompt + "\n\n" + context if context else prompt
    def _extractTextFromContentParts(self, extracted_content) -> str:
        """
        Extract text content from ExtractionService ContentPart objects.
        """
        if not extracted_content or not hasattr(extracted_content, 'parts'):
            return ""
        text_parts = []
        for part in extracted_content.parts:
            if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
                if hasattr(part, 'data') and part.data:
                    text_parts.append(part.data)
        return "\n\n".join(text_parts)
    def _reduceText(self, text: str, reduction_factor: float) -> str:
        """
        Reduce text size by the specified factor.
        """
        if reduction_factor >= 1.0:
            return text
        target_length = int(len(text) * reduction_factor)
        return text[:target_length] + "... [reduced]"
--- a/modules/services/serviceAi/subDocumentGeneration.py
+++ b/modules/services/serviceAi/subDocumentGeneration.py
@ -0,0 +1,804 @@
 import logging
 from typing import Dict, Any, List, Optional, Tuple, Union
 from modules.datamodels.datamodelChat import ChatDocument
 from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
 logger = logging.getLogger(__name__)
 class SubDocumentGeneration:
    """Document generation operations including single-file and multi-file generation."""
    def __init__(self, services, aiObjects, documentProcessor):
        """Initialize document generation service.
        Args:
            services: Service center instance for accessing other services
            aiObjects: Initialized AiObjects instance
            documentProcessor: Document processing service instance
        """
        self.services = services
        self.aiObjects = aiObjects
        self.documentProcessor = documentProcessor
    async def callAiWithDocumentGeneration(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions,
        outputFormat: str,
        title: Optional[str]
        ) -> Dict[str, Any]:
        """
        Handle AI calls with document generation in specific output format.
        Now supports both single-file and multi-file generation.
        Args:
            prompt: The main prompt for the AI call
            documents: Optional list of documents to process
            options: AI call configuration options
            outputFormat: Target output format (html, pdf, docx, txt, md, json, csv, xlsx)
            title: Optional title for generated documents
        Returns:
            Dict with generated documents and metadata
        """
        try:
            # Use AI to analyze prompt intent
            prompt_analysis = await self._analyzePromptIntent(prompt, self)
            logger.info(f"Prompt analysis result: {prompt_analysis}")
            if prompt_analysis.get("is_multi_file", False):
                return await self._callAiWithMultiFileGeneration(
                    prompt, documents, options, outputFormat, title, prompt_analysis
                )
            else:
                return await self._callAiWithSingleFileGeneration(
                    prompt, documents, options, outputFormat, title
                )
        except Exception as e:
            logger.error(f"Error in document generation: {str(e)}")
            return {
                "success": False,
                "error": str(e),
                "content": "",
                "rendered_content": "",
                "mime_type": "text/plain",
                "filename": f"error_{outputFormat}",
                "format": outputFormat,
                "title": title or "Error",
                "documents": []
            }
    async def _callAiWithSingleFileGeneration(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions,
        outputFormat: str,
        title: Optional[str],
        generationPrompt: Optional[str] = None
        ) -> Dict[str, Any]:
        """Handle single-file document generation (existing functionality)."""
        try:
            # Get format-specific extraction prompt from generation service
            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
            generation_service = GenerationService(self.services)
            # Use default title if not provided
            if not title:
                title = "AI Generated Document"
            # Get format-specific extraction prompt
            extractionPrompt = await generation_service.getExtractionPrompt(
                outputFormat=outputFormat,
                userPrompt=prompt,
                title=title,
                aiService=self
            )
            # Process documents with format-specific prompt using JSON mode
            # This ensures structured JSON output instead of text
            aiResponseJson = await self._callAiJson(extractionPrompt, documents, options)
            # Validate JSON response
            if not isinstance(aiResponseJson, dict) or "sections" not in aiResponseJson:
                raise Exception("AI response is not valid JSON document structure")
            # Emit raw extracted data as a chat message attachment before rendering
            try:
                await self._postRawDataChatMessage(aiResponseJson, label="raw_extraction_single")
            except Exception:
                logger.warning("Failed to emit raw extraction chat message (single-file)")
            # Generate filename from document metadata
            parsedFilename = None
            try:
                if aiResponseJson.get("metadata", {}).get("title"):
                    title = aiResponseJson["metadata"]["title"]
                    # Clean title for filename
                    import re
                    parsed = re.sub(r"[^a-zA-Z0-9._-]", "-", title)
                    parsed = re.sub(r"-+", "-", parsed).strip('-')
                    if parsed:
                        parsedFilename = f"{parsed}.{outputFormat}"
            except Exception:
                parsedFilename = None
            # Use AI generation to enhance the extracted JSON before rendering
            enhancedContent = aiResponseJson  # Default to original
            if prompt:
                try:
                    from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
                    # Get generation prompt
                    generationPrompt = await generation_service.getGenerationPrompt(
                        outputFormat=outputFormat,
                        userPrompt=prompt,
                        title=title,
                        aiService=self
                    )
                    # Prepare the AI call
                    request_options = AiCallOptions()
                    request_options.operationType = OperationType.GENERAL
                    # Create context with the extracted JSON content
                    import json
                    context = f"Extracted JSON content:\n{json.dumps(aiResponseJson, indent=2)}"
                    request = AiCallRequest(
                        prompt=generationPrompt,
                        context=context,
                        options=request_options
                    )
                    # Call AI to enhance the content
                    response = await self.aiObjects.call(request)
                    if response and response.content:
                        # Parse the AI response as JSON
                        try:
                            import re
                            result = response.content.strip()
                            # Extract JSON from markdown if present
                            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
                            if json_match:
                                result = json_match.group(1).strip()
                            elif result.startswith('```json'):
                                result = re.sub(r'^```json\s*', '', result)
                                result = re.sub(r'\s*```$', '', result)
                            elif result.startswith('```'):
                                result = re.sub(r'^```\s*', '', result)
                                result = re.sub(r'\s*```$', '', result)
                            # Try to parse JSON
                            enhancedContent = json.loads(result)
                            logger.info(f"AI enhanced JSON content successfully")
                        except json.JSONDecodeError as e:
                            logger.warning(f"AI generation returned invalid JSON: {str(e)}, using original content")
                            enhancedContent = aiResponseJson
                    else:
                        logger.warning("AI generation returned empty response, using original content")
                        enhancedContent = aiResponseJson
                except Exception as e:
                    logger.warning(f"AI generation failed: {str(e)}, using original content")
                    enhancedContent = aiResponseJson
            # Render the enhanced JSON content
            renderedContent, mimeType = await generation_service.renderReport(
                extractedContent=enhancedContent,
                outputFormat=outputFormat,
                title=title,
                userPrompt=prompt,
                aiService=self
            )
            # Generate meaningful filename (use AI-provided if valid, else fallback)
            from datetime import datetime, UTC
            timestamp = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
            if parsedFilename and parsedFilename.lower().endswith(f".{outputFormat.lower()}"):
                filename = parsedFilename
            else:
                safeTitle = ''.join(c if c.isalnum() else '-' for c in (title or 'document')).strip('-')
                filename = f"{safeTitle or 'document'}-{timestamp}.{outputFormat}"
            # Return structured result with document information
            return {
                "success": True,
                "content": aiResponseJson,  # Structured JSON document
                "rendered_content": renderedContent,  # Formatted content
                "mime_type": mimeType,
                "filename": filename,
                "format": outputFormat,
                "title": title,
                "documents": [{
                    "documentName": filename,
                    "documentData": renderedContent,
                    "mimeType": mimeType
                }],
                "is_multi_file": False
            }
        except Exception as e:
            logger.error(f"Error in single-file document generation: {str(e)}")
            raise
    async def _callAiWithMultiFileGeneration(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions,
        outputFormat: str,
        title: Optional[str],
        prompt_analysis: Dict[str, Any]
    ) -> Dict[str, Any]:
        """Handle multi-file document generation using AI analysis."""
        try:
            # Get multi-file extraction prompt based on AI analysis
            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
            generation_service = GenerationService(self.services)
            # Use default title if not provided
            if not title:
                title = "AI Generated Documents"
            # Get adaptive extraction prompt
            extraction_prompt = await generation_service.getAdaptiveExtractionPrompt(
                outputFormat=outputFormat,
                userPrompt=prompt,
                title=title,
                promptAnalysis=prompt_analysis,
                aiService=self
            )
            logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters")
            logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...")
            # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt
            logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars")
            logger.debug(f"Processing documents: {len(documents) if documents else 0} documents")
            # Use the existing pipeline but replace the prompt with our adaptive one
            # This ensures proper document processing while using the multi-file prompt
            ai_response = await self.documentProcessor.processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options)
            logger.info(f"AI response type: {type(ai_response)}")
            logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}")
            logger.debug(f"AI response preview: {str(ai_response)[:500]}...")
            # Validate response structure
            if not self._validateResponseStructure(ai_response, prompt_analysis):
                # Fallback to single-file if multi-file fails
                logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}")
                logger.warning(f"Prompt analysis: {prompt_analysis}")
                logger.warning("Falling back to single-file generation")
                return await self._callAiWithSingleFileGeneration(
                    prompt, documents, options, outputFormat, title
                )
            # Emit raw extracted data as a chat message attachment before transformation/rendering
            try:
                await self._postRawDataChatMessage(ai_response, label="raw_extraction_multi")
            except Exception:
                logger.warning("Failed to emit raw extraction chat message (multi-file)")
            # Process multiple documents
            generated_documents = []
            for i, doc_data in enumerate(ai_response.get("documents", [])):
                # Transform AI-generated sections to renderer-compatible format
                transformed_sections = []
                for section in doc_data.get("sections", []):
                    # Convert AI format to renderer format
                    transformed_section = {
                        "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
                        "content_type": section.get("content_type", "paragraph"),
                        "elements": section.get("elements", []),
                        "order": section.get("order", len(transformed_sections) + 1)
                    }
                    # Extract text from elements for simple text-based sections
                    if section.get("content_type") in ["paragraph", "heading"]:
                        text_parts = []
                        for element in section.get("elements", []):
                            if "text" in element:
                                text_parts.append(element["text"])
                        # Add text to the first element or create a new one
                        if transformed_section["elements"]:
                            transformed_section["elements"][0]["text"] = "\n".join(text_parts)
                        else:
                            transformed_section["elements"] = [{"text": "\n".join(text_parts)}]
                    transformed_sections.append(transformed_section)
                # Create complete document structure for rendering
                complete_document = {
                    "metadata": {
                        "title": doc_data["title"],
                        "source_document": "multi_file_generation",
                        "document_id": doc_data.get("id", f"doc_{i+1}"),
                        "filename": doc_data.get("filename", f"document_{i+1}"),
                        "split_strategy": prompt_analysis.get("strategy", "custom")
                    },
                    "sections": transformed_sections,
                    "summary": f"Generated document: {doc_data['title']}",
                    "tags": ["multi_file", "ai_generated"]
                }
                # Use AI generation to enhance the extracted JSON before rendering
                enhancedContent = complete_document  # Default to original
                if prompt:
                    try:
                        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
                        # Get generation prompt
                        generationPrompt = await generation_service.getGenerationPrompt(
                            outputFormat=outputFormat,
                            userPrompt=prompt,
                            title=doc_data["title"],
                            aiService=self
                        )
                        # Prepare the AI call
                        request_options = AiCallOptions()
                        request_options.operationType = OperationType.GENERAL
                        # Create context with the extracted JSON content
                        import json
                        context = f"Extracted JSON content:\n{json.dumps(complete_document, indent=2)}"
                        request = AiCallRequest(
                            prompt=generationPrompt,
                            context=context,
                            options=request_options
                        )
                        # Call AI to enhance the content
                        response = await self.aiObjects.call(request)
                        if response and response.content:
                            # Parse the AI response as JSON
                            try:
                                import re
                                result = response.content.strip()
                                # Extract JSON from markdown if present
                                json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
                                if json_match:
                                    result = json_match.group(1).strip()
                                elif result.startswith('```json'):
                                    result = re.sub(r'^```json\s*', '', result)
                                    result = re.sub(r'\s*```$', '', result)
                                elif result.startswith('```'):
                                    result = re.sub(r'^```\s*', '', result)
                                    result = re.sub(r'\s*```$', '', result)
                                # Try to parse JSON
                                enhancedContent = json.loads(result)
                                logger.info(f"AI enhanced JSON content successfully")
                            except json.JSONDecodeError as e:
                                logger.warning(f"AI generation returned invalid JSON: {str(e)}, attempting to repair...")
                                # Try to repair common JSON issues
                                try:
                                    repaired_result = self._repairJson(result)
                                    enhancedContent = json.loads(repaired_result)
                                    logger.info(f"Successfully repaired JSON content")
                                except (json.JSONDecodeError, Exception) as repair_error:
                                    logger.warning(f"JSON repair failed: {str(repair_error)}, trying AI repair...")
                                    # Try AI-powered JSON repair as last resort
                                    try:
                                        ai_repaired = await self._repairJsonWithAI(result)
                                        enhancedContent = json.loads(ai_repaired)
                                        logger.info(f"AI successfully repaired JSON content")
                                    except Exception as ai_repair_error:
                                        logger.warning(f"AI JSON repair also failed: {str(ai_repair_error)}, using original content")
                                        enhancedContent = complete_document
                        else:
                            logger.warning("AI generation returned empty response, using original content")
                            enhancedContent = complete_document
                    except Exception as e:
                        logger.warning(f"AI generation failed: {str(e)}, using original content")
                        enhancedContent = complete_document
                # Render the enhanced JSON content
                rendered_content, mime_type = await generation_service.renderReport(
                    extractedContent=enhancedContent,
                    outputFormat=outputFormat,
                    title=doc_data["title"],
                    userPrompt=prompt,
                    aiService=self
                )
                # Generate proper filename with correct extension
                base_filename = doc_data.get("filename", f"document_{i+1}")
                # Remove any existing extension and add the correct one
                if '.' in base_filename:
                    base_filename = base_filename.rsplit('.', 1)[0]
                # Add proper extension based on output format
                if outputFormat.lower() == "docx":
                    filename = f"{base_filename}.docx"
                elif outputFormat.lower() == "pdf":
                    filename = f"{base_filename}.pdf"
                elif outputFormat.lower() == "html":
                    filename = f"{base_filename}.html"
                else:
                    filename = f"{base_filename}.{outputFormat}"
                generated_documents.append({
                    "documentName": filename,
                    "documentData": rendered_content,
                    "mimeType": mime_type
                })
            # Save debug files for multi-file generation - only if debug enabled
            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if debug_enabled:
                try:
                    import os
                    from datetime import datetime, UTC
                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
                    debug_root = "./test-chat/ai"
                    debug_dir = os.path.join(debug_root, f"multifile_output_{ts}")
                    os.makedirs(debug_dir, exist_ok=True)
                    # Save metadata
                    with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f:
                        f.write(f"title: {title}\n")
                        f.write(f"format: {outputFormat}\n")
                        f.write(f"documents_count: {len(generated_documents)}\n")
                        f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n")
                        f.write(f"prompt_analysis: {prompt_analysis}\n")
                    # Save each generated document
                    for i, doc in enumerate(generated_documents):
                        doc_filename = doc["documentName"]
                        doc_data = doc["documentData"]
                        doc_mime = doc["mimeType"]
                        # Determine file extension
                        if outputFormat.lower() == "docx":
                            file_ext = ".docx"
                        elif outputFormat.lower() == "pdf":
                            file_ext = ".pdf"
                        elif outputFormat.lower() == "html":
                            file_ext = ".html"
                        else:
                            file_ext = f".{outputFormat}"
                        # Save the rendered document
                        output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}")
                        if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
                            # Text-based formats
                            with open(output_path, 'w', encoding='utf-8') as f:
                                f.write(doc_data)
                        else:
                            # Binary formats - decode from base64 if needed
                            try:
                                import base64
                                doc_bytes = base64.b64decode(doc_data)
                                with open(output_path, 'wb') as f:
                                    f.write(doc_bytes)
                            except Exception:
                                # If not base64, save as text
                                with open(output_path, 'w', encoding='utf-8') as f:
                                    f.write(doc_data)
                        logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}")
                    logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}")
                except Exception as e:
                    logger.warning(f"Failed to save multi-file debug output: {e}")
            return {
                "success": True,
                "content": ai_response,
                "rendered_content": None,  # Not applicable for multi-file
                "mime_type": None,  # Not applicable for multi-file
                "filename": None,  # Not applicable for multi-file
                "format": outputFormat,
                "title": title,
                "documents": generated_documents,
                "is_multi_file": True,
                "split_strategy": prompt_analysis.get("strategy", "custom")
            }
        except Exception as e:
            logger.error(f"Error in multi-file document generation: {str(e)}")
            # Fallback to single-file
            return await self._callAiWithSingleFileGeneration(
                prompt, documents, options, outputFormat, title
            )
    async def _callAiJson(
        self,
        prompt: str,
        documents: Optional[List[ChatDocument]],
        options: AiCallOptions
        ) -> Dict[str, Any]:
        """
        Handle AI calls with document processing for JSON output.
        Returns structured JSON document instead of text.
        """
        # Process documents with JSON merging
        return await self.documentProcessor.processDocumentsPerChunkJson(documents, prompt, options)
    async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
        """Use AI to analyze user prompt and determine processing requirements."""
        if not ai_service:
            return {"is_multi_file": False, "strategy": "single", "criteria": None}
        try:
            analysis_prompt = f"""
 Analyze this user request and determine if it requires multiple file output or single file output.
 User request: "{prompt}"
 Respond with JSON only in this exact format:
 {{
    "is_multi_file": true/false,
    "strategy": "single|per_entity|by_section|by_criteria|custom",
    "criteria": "description of how to split content",
    "file_naming_pattern": "suggested pattern for filenames",
    "reasoning": "brief explanation of the analysis"
 }}
 Consider:
 - Does the user want separate files for different entities (customers, products, etc.)?
 - Does the user want to split content into multiple documents?
 - What would be the most logical way to organize the content?
 - What language is the request in? (analyze in the original language)
 Return only the JSON response.
 """
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
            response = await ai_service.aiObjects.call(request)
            if response and response.content:
                import json
                import re
                # Extract JSON from response
                result = response.content.strip()
                json_match = re.search(r'\{.*\}', result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                analysis = json.loads(result)
                return analysis
            else:
                return {"is_multi_file": False, "strategy": "single", "criteria": None}
        except Exception as e:
            logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
            return {"is_multi_file": False, "strategy": "single", "criteria": None}
    def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool:
        """Validate that AI response matches the expected structure."""
        try:
            if not isinstance(response, dict):
                logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
                return False
            # Check for multi-file structure
            if prompt_analysis.get("is_multi_file", False):
                has_documents = "documents" in response
                is_documents_list = isinstance(response.get("documents"), list)
                logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}")
                if has_documents and is_documents_list:
                    logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found")
                else:
                    logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}")
                    logger.warning(f"Available keys: {list(response.keys())}")
                return has_documents and is_documents_list
            else:
                has_sections = "sections" in response
                is_sections_list = isinstance(response.get("sections"), list)
                logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}")
                return has_sections and is_sections_list
        except Exception as e:
            logger.warning(f"Response validation failed with exception: {str(e)}")
            return False
    async def _postRawDataChatMessage(self, payload: Any, label: str = "raw_extraction") -> None:
        """
        Create a ChatMessage with the extracted raw JSON attached as a file so the user
        has access to the data even if downstream processing fails.
        """
        try:
            services = self.services
            workflow = services.currentWorkflow
            # Serialize payload
            import json as _json
            from datetime import datetime, UTC
            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
            content_text = _json.dumps(payload, ensure_ascii=False, indent=2)
            content_bytes = content_text.encode('utf-8')
            # Store as file via component storage
            file_name = f"{label}_{ts}.json"
            file_item = services.interfaceDbComponent.createFile(
                name=file_name,
                mimeType="application/json",
                content=content_bytes
            )
            services.interfaceDbComponent.createFileData(file_item.id, content_bytes)
            # Lookup file info for ChatDocument
            file_info = services.workflow.getFileInfo(file_item.id)
            doc = ChatDocument(
                messageId="",  # set after message creation
                fileId=file_item.id,
                fileName=file_info.get("fileName", file_name) if file_info else file_name,
                fileSize=file_info.get("size", len(content_bytes)) if file_info else len(content_bytes),
                mimeType=file_info.get("mimeType", "application/json") if file_info else "application/json"
            )
            # Create message referencing the file
            messageData = {
                "workflowId": workflow.id,
                "role": "assistant",
                "message": "Raw extraction data saved",
                "status": "data",
                "sequenceNr": len(getattr(workflow, 'messages', []) or []) + 1,
                "publishedAt": services.utils.getUtcTimestamp(),
                "documentsLabel": label,
                "documents": []
            }
            message = services.workflow.createMessage(messageData)
            if not message:
                return
            # Persist ChatDocument with messageId
            doc.messageId = message.id
            services.interfaceDbChat.createDocument(doc.to_dict())
            # Update message to include document
            try:
                if not message.documents:
                    message.documents = []
                message.documents.append(doc)
                services.workflow.updateMessage(message.id, {"documents": [d.to_dict() for d in message.documents]})
            except Exception:
                pass
        except Exception:
            # Non-fatal; ignore if storage or chat creation fails
            return
    def _repairJson(self, json_string: str) -> str:
        """Repair common JSON syntax errors efficiently for large JSON."""
        try:
            import re
            import json
            # Remove any leading/trailing whitespace
            json_string = json_string.strip()
            # For large JSON, skip substring extraction and go straight to targeted repairs
            logger.info(f"Attempting JSON repair for {len(json_string)} characters...")
            # Try to parse first to see what specific error we get
            try:
                json.loads(json_string)
                return json_string  # Already valid
            except json.JSONDecodeError as e:
                error_msg = str(e)
                logger.info(f"JSON error: {error_msg}")
                # Apply targeted fixes based on the specific error
                if "Expecting ',' delimiter" in error_msg:
                    # Fix missing commas between array elements
                    json_string = re.sub(r'\]\s*\[', '], [', json_string)
                    json_string = re.sub(r'\}\s*\{', '}, {', json_string)
                    # Fix missing commas between object properties
                    json_string = re.sub(r'("\s*:\s*[^,}]+)\s*(")', r'\1, \2', json_string)
                if "Expecting value" in error_msg:
                    # Fix missing values (replace empty with null)
                    json_string = re.sub(r':\s*,', ': null,', json_string)
                    json_string = re.sub(r':\s*}', ': null}', json_string)
                if "Expecting property name" in error_msg:
                    # Fix unquoted property names
                    json_string = re.sub(r'(\w+):', r'"\1":', json_string)
                # Fix trailing commas before closing brackets/braces
                json_string = re.sub(r',(\s*[}\]])', r'\1', json_string)
                # Fix missing closing brackets/braces (only if reasonable)
                open_braces = json_string.count('{')
                close_braces = json_string.count('}')
                open_brackets = json_string.count('[')
                close_brackets = json_string.count(']')
                # Only add missing brackets if the difference is small (avoid runaway)
                if 0 < (open_braces - close_braces) <= 5:
                    missing_braces = open_braces - close_braces
                    json_string += '}' * missing_braces
                if 0 < (open_brackets - close_brackets) <= 5:
                    missing_brackets = open_brackets - close_brackets
                    json_string += ']' * missing_brackets
                # Try to parse again
                try:
                    json.loads(json_string)
                    logger.info("JSON repair successful")
                    return json_string
                except json.JSONDecodeError:
                    logger.warning("JSON repair failed - will try AI repair")
                    return json_string
        except Exception as e:
            logger.warning(f"JSON repair failed: {str(e)}")
            return json_string
    async def _repairJsonWithAI(self, malformed_json: str) -> str:
        """Use AI to repair malformed JSON efficiently for large files."""
        try:
            # Limit JSON size for AI processing (max 50KB to avoid token limits)
            max_json_size = 50000
            json_to_repair = malformed_json
            if len(malformed_json) > max_json_size:
                logger.warning(f"JSON too large ({len(malformed_json)} chars), truncating to {max_json_size} chars for AI repair")
                # Try to find a good truncation point (end of a complete object/array)
                truncate_at = max_json_size
                for i in range(max_json_size, max(0, max_json_size - 1000), -1):
                    if malformed_json[i] in ['}', ']']:
                        truncate_at = i + 1
                        break
                json_to_repair = malformed_json[:truncate_at] + "..."
            repair_prompt = f"""
 You are a JSON repair expert. Fix the following malformed JSON and return ONLY the corrected JSON, no explanations.
 Malformed JSON:
 {json_to_repair}
 Return only the valid JSON:
 """
            # Use AI to repair the JSON
            repaired_json = await self.services.ai.callAi(
                prompt=repair_prompt,
                documents=None,
                options={
                    "process_type": "text",
                    "operation_type": "generate_content",
                    "priority": "speed",
                    "max_cost": 0.01
                }
            )
            # Clean up the response (remove any markdown formatting)
            repaired_json = repaired_json.strip()
            if repaired_json.startswith('```json'):
                repaired_json = repaired_json[7:]
            if repaired_json.endswith('```'):
                repaired_json = repaired_json[:-3]
            repaired_json = repaired_json.strip()
            # Validate the repaired JSON
            import json
            json.loads(repaired_json)
            logger.info("AI JSON repair successful")
            return repaired_json
        except Exception as e:
            logger.warning(f"AI JSON repair failed: {str(e)}")
            return malformed_json
--- a/modules/services/serviceAi/subDocumentProcessing.py
+++ b/modules/services/serviceAi/subDocumentProcessing.py
--- a/modules/services/serviceAi/subUtilities.py
+++ b/modules/services/serviceAi/subUtilities.py
@ -0,0 +1,316 @@
 import logging
 from typing import Dict, Any, List, Optional, Tuple, Union
 from modules.datamodels.datamodelAi import ModelCapabilities, AiCallOptions
 logger = logging.getLogger(__name__)
 class SubUtilities:
    """Utility functions for text processing, debugging, and helper operations."""
    def __init__(self, services):
        """Initialize utilities service.
        Args:
            services: Service center instance for accessing other services
        """
        self.services = services
    def _writeTraceLog(self, contextText: str, data: Any) -> None:
        """Write raw data to the central trace log file without truncation."""
        try:
            import os
            import json
            from datetime import datetime, UTC
            # Only write if logger is in debug mode
            if logger.level > logging.DEBUG:
                return
            # Get log directory from configuration via service center if possible
            logDir = None
            try:
                logDir = self.services.utils.configGet("APP_LOGGING_LOG_DIR", "./")
            except Exception:
                pass
            if not logDir:
                logDir = "./"
            if not os.path.isabs(logDir):
                # Make it relative to gateway directory
                gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
                logDir = os.path.join(gatewayDir, logDir)
            os.makedirs(logDir, exist_ok=True)
            traceFile = os.path.join(logDir, "log_trace.log")
            timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
            traceEntry = f"[{timestamp}] {contextText}\n" + ("=" * 80) + "\n"
            if data is None:
                traceEntry += "No data provided\n"
            else:
                # Prefer exact text; if dict/list, pretty print JSON
                try:
                    if isinstance(data, (dict, list)):
                        traceEntry += f"JSON Data:\n{json.dumps(data, indent=2, ensure_ascii=False)}\n"
                    else:
                        text = str(data)
                        traceEntry += f"Text Data:\n{text}\n"
                except Exception:
                    traceEntry += f"Data (fallback): {str(data)}\n"
            traceEntry += ("=" * 80) + "\n\n"
            with open(traceFile, "a", encoding="utf-8") as f:
                f.write(traceEntry)
        except Exception:
            # Swallow to avoid recursive logging issues
            pass
    def _writeAiResponseDebug(self, label: str, content: str, partIndex: int = 1, modelName: str = None, continuation: bool = None) -> None:
        """Persist raw AI response parts for debugging under test-chat/ai - only if debug enabled."""
        try:
            # Check if debug logging is enabled
            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if not debug_enabled:
                return
            import os
            from datetime import datetime, UTC
            # Base dir: gateway/test-chat/ai (go up 4 levels from this file)
            # .../gateway/modules/services/serviceAi/subUtilities.py -> up to gateway root
            gatewayDir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
            outDir = os.path.join(gatewayDir, 'test-chat', 'ai')
            os.makedirs(outDir, exist_ok=True)
            ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
            suffix = []
            if partIndex is not None:
                suffix.append(f"part{partIndex}")
            if continuation is not None:
                suffix.append(f"cont_{str(continuation).lower()}")
            if modelName:
                safeModel = ''.join(c if c.isalnum() or c in ('-', '_') else '-' for c in modelName)
                suffix.append(safeModel)
            suffixStr = ('_' + '_'.join(suffix)) if suffix else ''
            fname = f"{ts}_{label}{suffixStr}.txt"
            fpath = os.path.join(outDir, fname)
            with open(fpath, 'w', encoding='utf-8') as f:
                f.write(content or '')
        except Exception:
            # Do not raise; best-effort debug write
            pass
    def _exceedsTokenLimit(self, text: str, model: ModelCapabilities, safety_margin: float) -> bool:
        """
        Check if text exceeds model token limit with safety margin.
        """
        # Simple character-based estimation (4 chars per token)
        estimated_tokens = len(text) // 4
        max_tokens = int(model.maxTokens * (1 - safety_margin))
        return estimated_tokens > max_tokens
    def _reduceText(self, text: str, reduction_factor: float) -> str:
        """
        Reduce text size by the specified factor.
        """
        if reduction_factor >= 1.0:
            return text
        target_length = int(len(text) * reduction_factor)
        return text[:target_length] + "... [reduced]"
    def _extractTextFromContentParts(self, extracted_content) -> str:
        """
        Extract text content from ExtractionService ContentPart objects.
        """
        if not extracted_content or not hasattr(extracted_content, 'parts'):
            return ""
        text_parts = []
        for part in extracted_content.parts:
            if hasattr(part, 'typeGroup') and part.typeGroup in ['text', 'table', 'structure']:
                if hasattr(part, 'data') and part.data:
                    text_parts.append(part.data)
        return "\n\n".join(text_parts)
    def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str:
        """
        Build full prompt by replacing placeholders with their content.
        Uses the new {{KEY:placeholder}} format.
        """
        if not placeholders:
            return prompt
        full_prompt = prompt
        for placeholder, content in placeholders.items():
            # Replace both old format {{placeholder}} and new format {{KEY:placeholder}}
            full_prompt = full_prompt.replace(f"{{{{{placeholder}}}}}", content)
            full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", content)
        return full_prompt
    def _reducePlanningPrompt(
        self,
        full_prompt: str,
        placeholders: Optional[Dict[str, str]],
        model: ModelCapabilities,
        options: AiCallOptions
        ) -> str:
        """
        Reduce planning prompt size by summarizing placeholders while preserving prompt structure.
        """
        if not placeholders:
            return self._reduceText(full_prompt, 0.7)
        # Reduce placeholders while preserving prompt
        reduced_placeholders = {}
        for placeholder, content in placeholders.items():
            if len(content) > 1000:  # Only reduce long content
                reduction_factor = 0.7
                reduced_content = self._reduceText(content, reduction_factor)
                reduced_placeholders[placeholder] = reduced_content
            else:
                reduced_placeholders[placeholder] = content
        return self._buildPromptWithPlaceholders(full_prompt, reduced_placeholders)
    def _reduceTextPrompt(
        self,
        prompt: str,
        context: str,
        model: ModelCapabilities,
        options: AiCallOptions
        ) -> str:
        """
        Reduce text prompt size using typeGroup-aware chunking and merging.
        """
        max_size = int(model.maxTokens * (1 - options.safetyMargin))
        if options.compressPrompt:
            # Reduce both prompt and context
            target_size = max_size
            current_size = len(prompt) + len(context)
            reduction_factor = (target_size * 0.7) / current_size
            if reduction_factor < 1.0:
                prompt = self._reduceText(prompt, reduction_factor)
                context = self._reduceText(context, reduction_factor)
        else:
            # Only reduce context, preserve prompt integrity
            max_context_size = max_size - len(prompt)
            if len(context) > max_context_size:
                reduction_factor = max_context_size / len(context)
                context = self._reduceText(context, reduction_factor)
        return prompt + "\n\n" + context if context else prompt
    async def _compressContent(self, content: str, targetSize: int, contentType: str) -> str:
        """Compress content to target size."""
        if len(content.encode("utf-8")) <= targetSize:
            return content
        try:
            compressionPrompt = f"""
            Komprimiere den folgenden {contentType} auf maximal {targetSize} Zeichen, 
            behalte aber alle wichtigen Informationen bei:
            {content}
            Gib nur den komprimierten Inhalt zurück, ohne zusätzliche Erklärungen.
            """
            # Service must not call connectors directly; use simple truncation fallback here
            data = content.encode("utf-8")
            return data[:targetSize].decode("utf-8", errors="ignore") + "... [truncated]"
        except Exception as e:
            logger.warning(f"AI compression failed, using truncation: {str(e)}")
            return content[:targetSize] + "... [truncated]"
    def _getModelCapabilitiesForContent(self, prompt: str, documents: Optional[List], options: AiCallOptions) -> Dict[str, int]:
        """
        Get model capabilities for content processing, including appropriate size limits for chunking.
        """
        # Estimate total content size
        prompt_size = len(prompt.encode('utf-8'))
        document_size = 0
        if documents:
            # Rough estimate of document content size
            for doc in documents:
                document_size += getattr(doc, 'fileSize', 0) or 0
        total_size = prompt_size + document_size
        # Use AiObjects to select the best model for this content size
        # We'll simulate the model selection by checking available models
        from modules.interfaces.interfaceAiObjects import aiModels
        # Find the best model for this content size and operation
        best_model = None
        best_context_length = 0
        for model_name, model_info in aiModels.items():
            context_length = model_info.get("contextLength", 0)
            # Skip models with no context length or too small for content
            if context_length == 0:
                continue
            # Check if model supports the operation type
            capabilities = model_info.get("capabilities", [])
            from modules.datamodels.datamodelAi import OperationType
            if options.operationType == OperationType.IMAGE_ANALYSIS and "image_analysis" not in capabilities:
                continue
            elif options.operationType == OperationType.IMAGE_GENERATION and "image_generation" not in capabilities:
                continue
            elif options.operationType == OperationType.WEB_RESEARCH and "web_search" not in capabilities:
                continue
            elif "text_generation" not in capabilities:
                continue
            # Prefer models that can handle the content without chunking, but allow chunking if needed
            if context_length >= total_size * 0.8:  # 80% of content size
                if context_length > best_context_length:
                    best_model = model_info
                    best_context_length = context_length
            elif best_model is None:  # Fallback to largest available model
                if context_length > best_context_length:
                    best_model = model_info
                    best_context_length = context_length
        # Fallback to a reasonable default if no model found
        if best_model is None:
            best_model = {
                "contextLength": 128000,  # GPT-4o default
                "llmName": "gpt-4o"
            }
        # Calculate appropriate sizes
        # Convert tokens to bytes (rough estimate: 1 token ≈ 4 characters)
        context_length_bytes = int(best_model["contextLength"] * 4)
        max_context_bytes = int(context_length_bytes * 0.9)  # 90% of context length
        text_chunk_size = int(max_context_bytes * 0.7)  # 70% of max context for text chunks
        image_chunk_size = int(max_context_bytes * 0.8)  # 80% of max context for image chunks
        logger.debug(f"Selected model: {best_model.get('llmName', 'unknown')} with context length: {best_model['contextLength']}")
        logger.debug(f"Content size: {total_size} bytes, Max context: {max_context_bytes} bytes")
        logger.debug(f"Text chunk size: {text_chunk_size} bytes, Image chunk size: {image_chunk_size} bytes")
        return {
            "maxContextBytes": max_context_bytes,
            "textChunkSize": text_chunk_size,
            "imageChunkSize": image_chunk_size
        }
    def _getModelsForOperation(self, operation_type: str, options: AiCallOptions) -> List[ModelCapabilities]:
        """
        Get models capable of handling the specific operation with capability filtering.
        """
        # Use the actual AI objects model selection instead of hardcoded default
        if hasattr(self, 'aiObjects') and self.aiObjects:
            # Let AiObjects handle the model selection
            return []
        else:
            # Fallback to default model if AiObjects not available
            default_model = ModelCapabilities(
                name="default",
                maxTokens=4000,
                capabilities=["text", "reasoning"] if operation_type == "planning" else ["text"],
                costPerToken=0.001,
                processingTime=1.0,
                isAvailable=True
            )
            return [default_model]
--- a/modules/services/serviceAi/subWebResearch.py
+++ b/modules/services/serviceAi/subWebResearch.py
@ -0,0 +1,384 @@
 import logging
 from typing import Dict, Any, List, Optional, Tuple, Union
 from modules.datamodels.datamodelWeb import (
    WebResearchRequest,
    WebResearchActionResult,
    WebResearchDocumentData,
    WebResearchActionDocument,
    WebSearchResultItem,
 )
 from modules.interfaces.interfaceAiObjects import AiObjects
 from modules.shared.configuration import APP_CONFIG
 logger = logging.getLogger(__name__)
 class SubWebResearch:
    """Web research operations including search, crawling, and analysis."""
    def __init__(self, services, aiObjects):
        """Initialize web research service.
        Args:
            services: Service center instance for accessing other services
            aiObjects: Initialized AiObjects instance
        """
        self.services = services
        self.aiObjects = aiObjects
    async def webResearch(self, request: WebResearchRequest) -> WebResearchActionResult:
        """Perform web research using interface functions."""
        try:
            logger.info(f"WEB RESEARCH STARTED")
            logger.info(f"User Query: {request.user_prompt}")
            logger.info(f"Max Results: {request.max_results}, Max Pages: {request.options.max_pages}")
            # Global URL index to track all processed URLs across the entire research session
            global_processed_urls = set()
            # Step 1: Find relevant websites - either provided URLs or AI-determined main URLs
            logger.info(f"=== STEP 1: INITIAL MAIN URLS LIST ===")
            if request.urls:
                # Use provided URLs as initial main URLs
                websites = request.urls
                logger.info(f"Using provided URLs ({len(websites)}):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            else:
                # Use AI to determine main URLs based on user's intention
                logger.info(f"AI analyzing user intent: '{request.user_prompt}'")
                # Use AI to generate optimized Tavily search query and search parameters
                query_optimizer_prompt = f"""You are a search query optimizer. 
        USER QUERY: {request.user_prompt}
        Your task: Create a search query and parameters for the USER QUERY given.
        RULES:
        1. The search query MUST be related to the user query above
        2. Extract key terms from the user query
        3. Determine appropriate country/language based on the query context
        4. Keep search query short (2-6 words)
        Return ONLY this JSON format:
        {{
        "user_prompt": "search query based on user query above",
        "country": "Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries)",
        "language": "language_code_or_null", 
        "topic": "general|news|academic_or_null",
        "time_range": "d|w|m|y_or_null",
        "selection_strategy": "single|multiple|specific_page",
        "selection_criteria": "what URLs to prioritize",
        "expected_url_patterns": ["pattern1", "pattern2"],
        "estimated_result_count": number
        }}"""
                # Get AI response for query optimization
                from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
                ai_request = AiCallRequest(
                    prompt=query_optimizer_prompt,
                    options=AiCallOptions()
                )
                ai_response_obj = await self.aiObjects.call(ai_request)
                ai_response = ai_response_obj.content
                logger.debug(f"AI query optimizer response: {ai_response}")
                # Parse AI response to extract search query
                import json
                try:
                    # Clean the response by removing markdown code blocks
                    cleaned_response = ai_response.strip()
                    if cleaned_response.startswith('```json'):
                        cleaned_response = cleaned_response[7:]  # Remove ```json
                    if cleaned_response.endswith('```'):
                        cleaned_response = cleaned_response[:-3]  # Remove ```
                    cleaned_response = cleaned_response.strip()
                    query_data = json.loads(cleaned_response)
                    search_query = query_data.get("user_prompt", request.user_prompt)
                    ai_country = query_data.get("country")
                    ai_language = query_data.get("language")
                    ai_topic = query_data.get("topic")
                    ai_time_range = query_data.get("time_range")
                    selection_strategy = query_data.get("selection_strategy", "multiple")
                    selection_criteria = query_data.get("selection_criteria", "relevant URLs")
                    expected_patterns = query_data.get("expected_url_patterns", [])
                    estimated_count = query_data.get("estimated_result_count", request.max_results)
                    logger.info(f"AI optimized search query: '{search_query}'")
                    logger.info(f"Selection strategy: {selection_strategy}")
                    logger.info(f"Selection criteria: {selection_criteria}")
                    logger.info(f"Expected URL patterns: {expected_patterns}")
                    logger.info(f"Estimated result count: {estimated_count}")
                except json.JSONDecodeError:
                    logger.warning("Failed to parse AI response as JSON, using original query")
                    search_query = request.user_prompt
                    ai_country = None
                    ai_language = None
                    ai_topic = None
                    ai_time_range = None
                    selection_strategy = "multiple"
                # Perform the web search with AI-determined parameters
                search_kwargs = {
                    "query": search_query,
                    "max_results": request.max_results,
                    "search_depth": request.options.search_depth,
                    "auto_parameters": False  # Use explicit parameters
                }
                # Add parameters only if they have valid values
                def _normalizeCountry(c: Optional[str]) -> Optional[str]:
                    if not c:
                        return None
                    s = str(c).strip()
                    if not s or s.lower() in ['null', 'none', 'undefined']:
                        return None
                    # Map common codes to full English names when easy to do without extra deps
                    mapping = {
                        'ch': 'Switzerland', 'che': 'Switzerland',
                        'de': 'Germany', 'ger': 'Germany', 'deu': 'Germany',
                        'at': 'Austria', 'aut': 'Austria',
                        'us': 'United States', 'usa': 'United States', 'uni ted states': 'United States',
                        'uk': 'United Kingdom', 'gb': 'United Kingdom', 'gbr': 'United Kingdom'
                    }
                    key = s.lower()
                    if key in mapping:
                        return mapping[key]
                    # If looks like full name, capitalize first letter only (Tavily accepts English names)
                    return s
                norm_ai_country = _normalizeCountry(ai_country)
                norm_req_country = _normalizeCountry(request.options.country)
                if norm_ai_country:
                    search_kwargs["country"] = norm_ai_country
                elif norm_req_country:
                    search_kwargs["country"] = norm_req_country
                if ai_language and ai_language not in ['null', '', 'none', 'undefined']:
                    search_kwargs["language"] = ai_language
                elif request.options.language and request.options.language not in ['null', '', 'none', 'undefined']:
                    search_kwargs["language"] = request.options.language
                if ai_topic and ai_topic in ['general', 'news', 'academic']:
                    search_kwargs["topic"] = ai_topic
                elif request.options.topic and request.options.topic in ['general', 'news', 'academic']:
                    search_kwargs["topic"] = request.options.topic
                if ai_time_range and ai_time_range in ['d', 'w', 'm', 'y']:
                    search_kwargs["time_range"] = ai_time_range
                elif request.options.time_range and request.options.time_range in ['d', 'w', 'm', 'y']:
                    search_kwargs["time_range"] = request.options.time_range
                # Constrain by expected domains if provided by AI
                try:
                    include_domains = []
                    for p in expected_patterns or []:
                        if not isinstance(p, str):
                            continue
                        # Extract bare domain from pattern or URL
                        import re
                        m = re.search(r"(?:https?://)?([^/\s]+)", p.strip())
                        if m:
                            domain = m.group(1).lower()
                            # strip leading www.
                            if domain.startswith('www.'):
                                domain = domain[4:]
                            include_domains.append(domain)
                    # Deduplicate
                    if include_domains:
                        seen = set()
                        uniq = []
                        for d in include_domains:
                            if d not in seen:
                                seen.add(d)
                                uniq.append(d)
                        search_kwargs["include_domains"] = uniq
                except Exception:
                    pass
                # Log the parameters being used
                logger.info(f"Search parameters: country={search_kwargs.get('country', 'not_set')}, language={search_kwargs.get('language', 'not_set')}, topic={search_kwargs.get('topic', 'not_set')}, time_range={search_kwargs.get('time_range', 'not_set')}, include_domains={search_kwargs.get('include_domains', [])}")
                search_results = await self.aiObjects.search_websites(**search_kwargs)
                logger.debug(f"Web search returned {len(search_results)} results:")
                for i, result in enumerate(search_results, 1):
                    logger.debug(f"   {i}. {result.url} - {result.title}")
                # Deduplicate while preserving order
                seen = set()
                search_urls = []
                for r in search_results:
                    u = str(r.url)
                    if u not in seen:
                        seen.add(u)
                        search_urls.append(u)
                logger.info(f"After initial deduplication: {len(search_urls)} unique URLs from {len(search_results)} search results")
                if not search_urls:
                    logger.error("No relevant websites found")
                    return WebResearchActionResult(success=False, error="No relevant websites found")
                # Now use AI to determine the main URLs based on user's intention
                logger.info(f"AI selecting main URLs from {len(search_urls)} search results based on user intent")
                # Create a prompt for AI to identify main URLs based on user's intention
                ai_prompt = f"""
        Select the most relevant URLs from these search results:
        {chr(10).join([f"{i+1}. {url}" for i, url in enumerate(search_urls)])}
        Return only the URLs that are most relevant for the user's query.
        One URL per line.
        """                
                # Create AI call request
                ai_request = AiCallRequest(
                    prompt=ai_prompt,
                    options=AiCallOptions()
                )
                ai_response_obj = await self.aiObjects.call(ai_request)
                ai_response = ai_response_obj.content
                logger.debug(f"AI response for main URL selection: {ai_response}")
                # Parse AI response to extract URLs
                websites = []
                for line in ai_response.strip().split('\n'):
                    line = line.strip()
                    if line and ('http://' in line or 'https://' in line):
                        # Extract URL from the line
                        for word in line.split():
                            if word.startswith('http://') or word.startswith('https://'):
                                websites.append(word.rstrip('.,;'))
                                break
                if not websites:
                    logger.warning("AI did not identify any main URLs, using first few search results")
                    websites = search_urls[:3]  # Fallback to first 3 search results
                # Deduplicate while preserving order
                seen = set()
                unique_websites = []
                for url in websites:
                    if url not in seen:
                        seen.add(url)
                        unique_websites.append(url)
                websites = unique_websites
                logger.info(f"After AI selection deduplication: {len(websites)} unique URLs from {len(websites)} AI-selected URLs")
                logger.info(f"AI selected {len(websites)} main URLs (after deduplication):")
                for i, url in enumerate(websites, 1):
                    logger.info(f"   {i}. {url}")
            # Step 2: Smart website selection using AI interface
            logger.info(f"=== STEP 2: FILTERED URL LIST BY USER PROMPT'S INTENTION ===")
            logger.info(f"AI analyzing {len(websites)} URLs for relevance to: '{request.user_prompt}'")
            selectedWebsites, aiResponse = await self.aiObjects.selectRelevantWebsites(websites, request.user_prompt)
            logger.debug(f"AI Response: {aiResponse}")
            logger.debug(f"AI selected {len(selectedWebsites)} most relevant URLs:")
            for i, url in enumerate(selectedWebsites, 1):
                logger.debug(f"   {i}. {url}")
            # Show which were filtered out
            filtered_out = [url for url in websites if url not in selectedWebsites]
            if filtered_out:
                logger.debug(f"Filtered out {len(filtered_out)} less relevant URLs:")
                for i, url in enumerate(filtered_out, 1):
                    logger.debug(f"   {i}. {url}")
            # Step 3+4+5: Recursive crawling with configurable depth
            # Get configuration parameters
            max_depth = int(APP_CONFIG.get("Web_Research_MAX_DEPTH", "2"))
            max_links_per_domain = int(APP_CONFIG.get("Web_Research_MAX_LINKS_PER_DOMAIN", "4"))
            crawl_timeout_minutes = int(APP_CONFIG.get("Web_Research_CRAWL_TIMEOUT_MINUTES", "10"))
            crawl_timeout_seconds = crawl_timeout_minutes * 60
            # Use the configured max_depth or the request's pages_search_depth, whichever is smaller
            effective_depth = min(max_depth, request.options.pages_search_depth)
            logger.info(f"=== STEP 3+4+5: RECURSIVE CRAWLING (DEPTH {effective_depth}) ===")
            logger.info(f"Starting recursive crawl of {len(selectedWebsites)} main websites...")
            logger.info(f"Search depth: {effective_depth} levels (max configured: {max_depth})")
            logger.info(f"Max links per domain: {max_links_per_domain}")
            logger.info(f"Crawl timeout: {crawl_timeout_minutes} minutes")
            # Use recursive crawling with URL index to avoid duplicates
            import asyncio
            try:
                allContent = await asyncio.wait_for(
                    self.aiObjects.crawlRecursively(
                        urls=selectedWebsites,
                        max_depth=effective_depth,
                        extract_depth=request.options.extract_depth,
                        max_per_domain=max_links_per_domain,
                        global_processed_urls=global_processed_urls
                    ),
                    timeout=crawl_timeout_seconds
                )
                logger.info(f"Crawling completed within timeout: {len(allContent)} pages crawled")
            except asyncio.TimeoutError:
                logger.warning(f"Crawling timed out after {crawl_timeout_minutes} minutes, using partial results")
                # crawlRecursively now handles timeouts gracefully and returns partial results
                # Try to get the partial results that were collected
                allContent = {}
            if not allContent:
                logger.error("Could not extract content from any websites")
                return WebResearchActionResult(success=False, error="Could not extract content from any websites")
            logger.info(f"=== WEB RESEARCH COMPLETED ===")
            logger.info(f"Successfully crawled {len(allContent)} URLs total")
            logger.info(f"Crawl depth: {effective_depth} levels")
            # Create simple result with raw content
            sources = [WebSearchResultItem(title=url, url=url) for url in selectedWebsites]
            # Get all additional links (all URLs except main ones)
            additional_links = [url for url in allContent.keys() if url not in selectedWebsites]
            # Combine all content into a single result
            combinedContent = ""
            for url, content in allContent.items():
                combinedContent += f"\n\n=== {url} ===\n{content}\n"
            documentData = WebResearchDocumentData(
                user_prompt=request.user_prompt,
                websites_analyzed=len(allContent),
                additional_links_found=len(additional_links),
                analysis_result=combinedContent,  # Raw content, no analysis
                sources=sources,
                additional_links=additional_links,
                individual_content=allContent,  # Individual URL -> content mapping
                debug_info={
                    "crawl_depth": effective_depth,
                    "max_configured_depth": max_depth,
                    "max_links_per_domain": max_links_per_domain,
                    "crawl_timeout_minutes": crawl_timeout_minutes,
                    "total_urls_crawled": len(allContent),
                    "main_urls": len(selectedWebsites),
                    "additional_urls": len(additional_links)
                }
            )
            document = WebResearchActionDocument(
                documentName=f"web_research_{request.user_prompt[:50]}.json",
                documentData=documentData,
                mimeType="application/json"
            )
            return WebResearchActionResult(
                success=True,
                documents=[document],
                resultLabel="web_research_results"
            )
        except Exception as e:
            logger.error(f"Error in web research: {str(e)}")
            return WebResearchActionResult(success=False, error=str(e))
--- a/modules/services/serviceExtraction/chunking/image_chunker.py
+++ b/modules/services/serviceExtraction/chunking/image_chunker.py
--- a/modules/services/serviceExtraction/chunking/structure_chunker.py
+++ b/modules/services/serviceExtraction/chunking/structure_chunker.py
--- a/modules/services/serviceExtraction/chunking/table_chunker.py
+++ b/modules/services/serviceExtraction/chunking/table_chunker.py
--- a/modules/services/serviceExtraction/chunking/text_chunker.py
+++ b/modules/services/serviceExtraction/chunking/text_chunker.py
--- a/modules/services/serviceExtraction/extractors/init.py
+++ b/modules/services/serviceExtraction/extractors/init.py
--- a/modules/services/serviceExtraction/extractors/extractorBinary.py
+++ b/modules/services/serviceExtraction/extractors/extractorBinary.py
@ -7,8 +7,28 @@ from ..subRegistry import Extractor
 class BinaryExtractor(Extractor):
    """
    Fallback extractor for unsupported file types.
    This extractor handles any file type that doesn't match other extractors.
    It encodes the file as base64 and marks it as binary data.
    Supported formats:
    - All file types (fallback)
    - MIME types: application/octet-stream (default)
    - File extensions: All (fallback)
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return True
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions (all)."""
        return []  # Accepts all extensions as fallback
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types (all)."""
        return []  # Accepts all MIME types as fallback
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/octet-stream"
--- a/modules/services/serviceExtraction/extractors/extractorCsv.py
+++ b/modules/services/serviceExtraction/extractors/extractorCsv.py
@ -6,8 +6,25 @@ from ..subRegistry import Extractor
 class CsvExtractor(Extractor):
    """
    Extractor for CSV files.
    Supported formats:
    - MIME types: text/csv
    - File extensions: .csv
    - Special handling: Treats as table data
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "text/csv" or (fileName or "").lower().endswith(".csv")
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".csv"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["text/csv"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
--- a/modules/services/serviceExtraction/extractors/extractorDocx.py
+++ b/modules/services/serviceExtraction/extractors/extractorDocx.py
@ -7,6 +7,16 @@ from ..subRegistry import Extractor
 class DocxExtractor(Extractor):
    """
    Extractor for Microsoft Word documents.
    Supported formats:
    - MIME types: application/vnd.openxmlformats-officedocument.wordprocessingml.document
    - File extensions: .docx
    - Special handling: Extracts paragraphs and tables (converts tables to CSV)
    - Dependencies: python-docx
    """
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -24,6 +34,14 @@ class DocxExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" or (fileName or "").lower().endswith(".docx")
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".docx"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["application/vnd.openxmlformats-officedocument.wordprocessingml.document"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
--- a/modules/services/serviceExtraction/extractors/extractorHtml.py
+++ b/modules/services/serviceExtraction/extractors/extractorHtml.py
@ -7,8 +7,26 @@ from ..subRegistry import Extractor
 class HtmlExtractor(Extractor):
    """
    Extractor for HTML files.
    Supported formats:
    - MIME types: text/html
    - File extensions: .html, .htm
    - Special handling: Uses BeautifulSoup for parsing
    - Dependencies: beautifulsoup4
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "text/html" or (fileName or "").lower().endswith((".html", ".htm"))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".html", ".htm"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["text/html"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "text/html"
--- a/modules/services/serviceExtraction/extractors/extractorImage.py
+++ b/modules/services/serviceExtraction/extractors/extractorImage.py
@ -0,0 +1,75 @@
 from typing import Any, Dict, List
 import base64
 import logging
 from ..subUtils import makeId
 from modules.datamodels.datamodelExtraction import ContentPart
 from ..subRegistry import Extractor
 logger = logging.getLogger(__name__)
 class ImageExtractor(Extractor):
    """
    Extractor for image files.
    Supported formats:
    - MIME types: image/jpeg, image/png, image/gif, image/webp, image/bmp, image/tiff
    - File extensions: .jpg, .jpeg, .png, .gif, .webp, .bmp, .tiff
    - Special handling: GIF files are converted to PNG during extraction
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return ((mimeType or "").startswith("image/") or 
                (fileName or "").lower().endswith((".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff")))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["image/jpeg", "image/png", "image/gif", "image/webp", "image/bmp", "image/tiff"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "image/unknown"
        fileName = context.get("fileName", "")
        # Convert GIF to PNG during extraction
        if mimeType.lower() == "image/gif":
            try:
                from PIL import Image
                import io
                # Open GIF and convert to PNG
                with Image.open(io.BytesIO(fileBytes)) as img:
                    # Convert to RGB (removes animation)
                    if img.mode in ('RGBA', 'LA', 'P'):
                        img = img.convert('RGB')
                    # Save as PNG in memory
                    png_buffer = io.BytesIO()
                    img.save(png_buffer, format='PNG')
                    png_data = png_buffer.getvalue()
                    # Update mimeType and fileBytes
                    mimeType = "image/png"
                    fileBytes = png_data
                    logger.info(f"GIF converted to PNG during extraction: {fileName}, original={len(fileBytes)} bytes, converted={len(png_data)} bytes")
            except Exception as e:
                logger.warning(f"GIF conversion failed during extraction for {fileName}: {str(e)}, using original")
                # Keep original GIF data if conversion fails
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="image",
            typeGroup="image",
            mimeType=mimeType,
            data=base64.b64encode(fileBytes).decode("utf-8"),
            metadata={"size": len(fileBytes)}
        )]
--- a/modules/services/serviceExtraction/extractors/extractorJson.py
+++ b/modules/services/serviceExtraction/extractors/extractorJson.py
@ -7,8 +7,25 @@ from ..subRegistry import Extractor
 class JsonExtractor(Extractor):
    """
    Extractor for JSON files.
    Supported formats:
    - MIME types: application/json
    - File extensions: .json
    - Special handling: Validates JSON format, falls back to text if invalid
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/json" or (fileName or "").lower().endswith(".json")
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".json"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["application/json"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/json"
--- a/modules/services/serviceExtraction/extractors/extractorPdf.py
+++ b/modules/services/serviceExtraction/extractors/extractorPdf.py
@ -8,6 +8,16 @@ from ..subRegistry import Extractor
 class PdfExtractor(Extractor):
    """
    Extractor for PDF files.
    Supported formats:
    - MIME types: application/pdf
    - File extensions: .pdf
    - Special handling: Extracts text per page and embedded images
    - Dependencies: PyPDF2, PyMuPDF (fitz)
    """
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -26,6 +36,14 @@ class PdfExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/pdf" or (fileName or "").lower().endswith(".pdf")
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".pdf"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["application/pdf"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
--- a/modules/services/serviceExtraction/extractors/extractorPptx.py
+++ b/modules/services/serviceExtraction/extractors/extractorPptx.py
@ -0,0 +1,225 @@
 import logging
 import base64
 from typing import List, Dict, Any, Optional
 from modules.datamodels.datamodelExtraction import ContentPart, ContentExtracted
 from ..subRegistry import Extractor
 logger = logging.getLogger(__name__)
 class PptxExtractor(Extractor):
    """
    Extractor for PowerPoint files.
    Supported formats:
    - MIME types: application/vnd.openxmlformats-officedocument.presentationml.presentation, application/vnd.ms-powerpoint
    - File extensions: .pptx, .ppt
    - Special handling: Extracts slide content, tables, and images
    - Dependencies: python-pptx
    """
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
    def _load(self):
        if self._loaded:
            return
        self._loaded = True
        try:
            global Presentation
            from pptx import Presentation
            self._haveLibs = True
        except Exception:
            self._haveLibs = False
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return (mimeType in [
            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
            "application/vnd.ms-powerpoint"
        ]) or (fileName or "").lower().endswith((".pptx", ".ppt"))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".pptx", ".ppt"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return [
            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
            "application/vnd.ms-powerpoint"
        ]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        """
        Extract content from PowerPoint files.
        Args:
            fileBytes: Raw file data as bytes
            context: Context dictionary with file information
        Returns:
            List of ContentPart objects with extracted content
        """
        self._load()
        if not self._haveLibs:
            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
            return [ContentPart(
                id="error",
                label="PowerPoint Extraction Error",
                typeGroup="text",
                mimeType="text/plain",
                data="Error: python-pptx library not installed",
                metadata={"error": True, "error_message": "python-pptx library not installed"}
            )]
        try:
            import io
            # Load presentation from bytes
            presentation = Presentation(io.BytesIO(fileBytes))
            parts = []
            slide_index = 0
            # Extract content from each slide
            for slide in presentation.slides:
                slide_index += 1
                slide_content = []
                # Extract text from slide
                for shape in slide.shapes:
                    if hasattr(shape, "text") and shape.text.strip():
                        slide_content.append(shape.text.strip())
                # Extract table data
                for shape in slide.shapes:
                    if shape.has_table:
                        table = shape.table
                        table_data = []
                        for row in table.rows:
                            row_data = []
                            for cell in row.cells:
                                row_data.append(cell.text.strip())
                            table_data.append(row_data)
                        if table_data:
                            # Convert table to markdown format
                            table_md = self._table_to_markdown(table_data)
                            slide_content.append(table_md)
                # Extract images
                for shape in slide.shapes:
                    if shape.shape_type == 13:  # MSO_SHAPE_TYPE.PICTURE
                        try:
                            image = shape.image
                            image_bytes = image.blob
                            image_b64 = base64.b64encode(image_bytes).decode('utf-8')
                            # Create image part
                            image_part = ContentPart(
                                id=f"slide_{slide_index}_image_{len(parts)}",
                                label=f"Slide {slide_index} Image",
                                typeGroup="image",
                                mimeType="image/png",  # Default to PNG
                                data=image_b64,
                                metadata={
                                    "slide_number": slide_index,
                                    "shape_type": "image",
                                    "extracted_from": "powerpoint"
                                }
                            )
                            parts.append(image_part)
                        except Exception as e:
                            logger.warning(f"Failed to extract image from slide {slide_index}: {str(e)}")
                # Create slide content part
                if slide_content:
                    slide_text = f"# Slide {slide_index}\n\n" + "\n\n".join(slide_content)
                    slide_part = ContentPart(
                        id=f"slide_{slide_index}",
                        label=f"Slide {slide_index} Content",
                        typeGroup="structure",
                        mimeType="text/plain",
                        data=slide_text,
                        metadata={
                            "slide_number": slide_index,
                            "content_type": "slide",
                            "extracted_from": "powerpoint",
                            "text_length": len(slide_text)
                        }
                    )
                    parts.append(slide_part)
            # Create presentation overview
            file_name = context.get("fileName", "presentation.pptx")
            overview_text = f"# PowerPoint Presentation: {file_name}\n\n"
            overview_text += f"**Total Slides:** {len(presentation.slides)}\n\n"
            overview_text += f"**Content Parts:** {len(parts)}\n\n"
            # Add slide summaries
            for i, slide in enumerate(presentation.slides, 1):
                slide_text_parts = []
                for shape in slide.shapes:
                    if hasattr(shape, "text") and shape.text.strip():
                        slide_text_parts.append(shape.text.strip())
                if slide_text_parts:
                    overview_text += f"## Slide {i}\n"
                    overview_text += "\n".join(slide_text_parts[:3])  # First 3 text elements
                    overview_text += "\n\n"
            # Create overview part
            overview_part = ContentPart(
                id="presentation_overview",
                label="Presentation Overview",
                typeGroup="text",
                mimeType="text/plain",
                data=overview_text,
                metadata={
                    "content_type": "overview",
                    "extracted_from": "powerpoint",
                    "total_slides": len(presentation.slides),
                    "text_length": len(overview_text)
                }
            )
            parts.insert(0, overview_part)  # Insert at beginning
            return parts
        except Exception as e:
            logger.error(f"Error extracting PowerPoint content: {str(e)}")
            return [ContentPart(
                id="error",
                label="PowerPoint Extraction Error",
                typeGroup="text",
                mimeType="text/plain",
                data=f"Error extracting PowerPoint content: {str(e)}",
                metadata={"error": True, "error_message": str(e)}
            )]
    def _table_to_markdown(self, table_data: List[List[str]]) -> str:
        """Convert table data to markdown format."""
        if not table_data:
            return ""
        markdown_lines = []
        # Header row
        if table_data:
            header = "| " + " | ".join(table_data[0]) + " |"
            markdown_lines.append(header)
            # Separator row
            separator = "| " + " | ".join(["---"] * len(table_data[0])) + " |"
            markdown_lines.append(separator)
            # Data rows
            for row in table_data[1:]:
                data_row = "| " + " | ".join(row) + " |"
                markdown_lines.append(data_row)
        return "\n".join(markdown_lines)
--- a/modules/services/serviceExtraction/extractors/extractorSql.py
+++ b/modules/services/serviceExtraction/extractors/extractorSql.py
@ -0,0 +1,56 @@
 from typing import Any, Dict, List
 from modules.datamodels.datamodelExtraction import ContentPart
 from ..subUtils import makeId
 from ..subRegistry import Extractor
 class SqlExtractor(Extractor):
    """
    Extractor for SQL files.
    Supported formats:
    - MIME types: text/x-sql, application/sql
    - File extensions: .sql, .ddl, .dml, .dcl, .tcl
    - Special handling: Treats as structured text with SQL syntax
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return (mimeType in ("text/x-sql", "application/sql") or 
                (fileName or "").lower().endswith((".sql", ".ddl", ".dml", ".dcl", ".tcl")))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".sql", ".ddl", ".dml", ".dcl", ".tcl"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["text/x-sql", "application/sql"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
        mimeType = context.get("mimeType") or "text/x-sql"
        data = fileBytes.decode("utf-8", errors="replace")
        # Add SQL-specific metadata
        metadata = {
            "size": len(fileBytes),
            "file_type": "sql",
            "line_count": len(data.splitlines()),
            "has_select": "SELECT" in data.upper(),
            "has_insert": "INSERT" in data.upper(),
            "has_update": "UPDATE" in data.upper(),
            "has_delete": "DELETE" in data.upper(),
            "has_create": "CREATE" in data.upper(),
            "has_drop": "DROP" in data.upper()
        }
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="main",
            typeGroup="structure",
            mimeType=mimeType,
            data=data,
            metadata=metadata
        )]
--- a/modules/services/serviceExtraction/extractors/extractorText.py
+++ b/modules/services/serviceExtraction/extractors/extractorText.py
@ -0,0 +1,103 @@
 from typing import Any, Dict, List
 from modules.datamodels.datamodelExtraction import ContentPart
 from ..subUtils import makeId
 from ..subRegistry import Extractor
 class TextExtractor(Extractor):
    """
    Extractor for plain text files and code files.
    Supported formats:
    - MIME types: text/plain, text/markdown, text/x-python, text/x-java-source, text/javascript, etc.
    - File extensions: .txt, .md, .log, .java, .js, .jsx, .ts, .tsx, .py, .config, .ini, .cfg, .conf, .properties, .yaml, .yml, .toml, .sh, .bat, .ps1, .sql, .css, .scss, .sass, .less, .xml, .json, .csv, .tsv, .rtf, .tex, .rst, .adoc, .org, .pod, .man, .1, .2, .3, .4, .5, .6, .7, .8, .9, .n, .l, .m, .r, .t, .x, .y, .z
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        # Check MIME types
        if mimeType and mimeType.startswith("text/"):
            return True
        # Check file extensions
        if fileName:
            ext = fileName.lower()
            return ext.endswith((
                # Basic text files
                ".txt", ".md", ".log", ".rtf", ".tex", ".rst", ".adoc", ".org", ".pod",
                # Programming languages
                ".java", ".js", ".jsx", ".ts", ".tsx", ".py", ".rb", ".go", ".rs", ".cpp", ".c", ".h", ".hpp", ".cc", ".cxx",
                ".cs", ".php", ".swift", ".kt", ".scala", ".clj", ".hs", ".ml", ".fs", ".vb", ".dart", ".r", ".m", ".pl", ".sh",
                # Web technologies
                ".html", ".htm", ".css", ".scss", ".sass", ".less", ".vue", ".svelte",
                # Configuration files
                ".config", ".ini", ".cfg", ".conf", ".properties", ".yaml", ".yml", ".toml", ".json", ".xml",
                # Scripts and automation
                ".bat", ".ps1", ".psm1", ".psd1", ".vbs", ".wsf", ".cmd", ".com",
                # Data files
                ".csv", ".tsv", ".tab", ".dat", ".data",
                # Documentation
                ".man", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".n", ".l", ".m", ".r", ".t", ".x", ".y", ".z",
                # Other text formats
                ".diff", ".patch", ".gitignore", ".dockerignore", ".editorconfig", ".gitattributes",
                ".env", ".env.local", ".env.development", ".env.production", ".env.test",
                ".lock", ".lockb", ".lockfile", ".pkg-lock", ".yarn-lock"
            ))
        return False
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [
            # Basic text files
            ".txt", ".md", ".log", ".rtf", ".tex", ".rst", ".adoc", ".org", ".pod",
            # Programming languages
            ".java", ".js", ".jsx", ".ts", ".tsx", ".py", ".rb", ".go", ".rs", ".cpp", ".c", ".h", ".hpp", ".cc", ".cxx",
            ".cs", ".php", ".swift", ".kt", ".scala", ".clj", ".hs", ".ml", ".fs", ".vb", ".dart", ".r", ".m", ".pl", ".sh",
            # Web technologies
            ".html", ".htm", ".css", ".scss", ".sass", ".less", ".vue", ".svelte",
            # Configuration files
            ".config", ".ini", ".cfg", ".conf", ".properties", ".yaml", ".yml", ".toml", ".json", ".xml",
            # Scripts and automation
            ".bat", ".ps1", ".psm1", ".psd1", ".vbs", ".wsf", ".cmd", ".com",
            # Data files
            ".csv", ".tsv", ".tab", ".dat", ".data",
            # Documentation
            ".man", ".1", ".2", ".3", ".4", ".5", ".6", ".7", ".8", ".9", ".n", ".l", ".m", ".r", ".t", ".x", ".y", ".z",
            # Other text formats
            ".diff", ".patch", ".gitignore", ".dockerignore", ".editorconfig", ".gitattributes",
            ".env", ".env.local", ".env.development", ".env.production", ".env.test",
            ".lock", ".lockb", ".lockfile", ".pkg-lock", ".yarn-lock"
        ]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return [
            "text/plain", "text/markdown", "text/x-python", "text/x-java-source", 
            "text/javascript", "text/x-javascript", "text/typescript", "text/x-typescript",
            "text/x-c", "text/x-c++", "text/x-csharp", "text/x-php", "text/x-ruby",
            "text/x-go", "text/x-rust", "text/x-scala", "text/x-swift", "text/x-kotlin",
            "text/x-sql", "text/x-sh", "text/x-shellscript", "text/x-yaml", "text/x-toml",
            "text/x-ini", "text/x-config", "text/x-properties", "text/x-log",
            "text/html", "text/css", "text/x-scss", "text/x-sass", "text/x-less",
            "text/xml", "text/csv", "text/tab-separated-values", "text/rtf",
            "text/x-tex", "text/x-rst", "text/x-asciidoc", "text/x-org",
            "application/x-yaml", "application/x-toml", "application/x-ini",
            "application/x-config", "application/x-properties", "application/x-log"
        ]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
        mimeType = context.get("mimeType") or "text/plain"
        data = fileBytes.decode("utf-8", errors="replace")
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="main",
            typeGroup="text",
            mimeType=mimeType,
            data=data,
            metadata={"size": len(fileBytes)}
        )]
--- a/modules/services/serviceExtraction/extractors/extractorXlsx.py
+++ b/modules/services/serviceExtraction/extractors/extractorXlsx.py
@ -8,6 +8,16 @@ from ..subRegistry import Extractor
 class XlsxExtractor(Extractor):
    """
    Extractor for Microsoft Excel spreadsheets.
    Supported formats:
    - MIME types: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
    - File extensions: .xlsx, .xlsm
    - Special handling: Extracts all sheets as CSV data
    - Dependencies: openpyxl
    """
    def __init__(self):
        self._loaded = False
        self._haveLibs = False
@ -26,6 +36,14 @@ class XlsxExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        mt = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        return mimeType == mt or (fileName or "").lower().endswith((".xlsx", ".xlsm"))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".xlsx", ".xlsm"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        self._load()
--- a/modules/services/serviceExtraction/extractors/extractorXml.py
+++ b/modules/services/serviceExtraction/extractors/extractorXml.py
@ -7,8 +7,25 @@ from ..subRegistry import Extractor
 class XmlExtractor(Extractor):
    """
    Extractor for XML files.
    Supported formats:
    - MIME types: application/xml
    - File extensions: .xml, .rss, .atom
    - Special handling: Uses ElementTree for parsing
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType == "application/xml" or (fileName or "").lower().endswith((".xml", ".rss", ".atom"))
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions."""
        return [".xml", ".rss", ".atom"]
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return ["application/xml"]
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "application/xml"
--- a/modules/services/serviceExtraction/formats/image_extractor.py
+++ b/modules/services/serviceExtraction/formats/image_extractor.py
@ -1,25 +0,0 @@
 from typing import Any, Dict, List
 import base64
 from ..subUtils import makeId
 from modules.datamodels.datamodelExtraction import ContentPart
 from ..subRegistry import Extractor
 class ImageExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return (mimeType or "").startswith("image/")
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        mimeType = context.get("mimeType") or "image/unknown"
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="image",
            typeGroup="image",
            mimeType=mimeType,
            data=base64.b64encode(fileBytes).decode("utf-8"),
            metadata={"size": len(fileBytes)}
        )]
--- a/modules/services/serviceExtraction/formats/text_extractor.py
+++ b/modules/services/serviceExtraction/formats/text_extractor.py
@ -1,26 +0,0 @@
 from typing import Any, Dict, List
 from modules.datamodels.datamodelExtraction import ContentPart
 from ..subUtils import makeId
 from ..subRegistry import Extractor
 class TextExtractor(Extractor):
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        return mimeType in ("text/plain", "text/markdown")
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> List[ContentPart]:
        fileName = context.get("fileName")
        mimeType = context.get("mimeType") or "text/plain"
        data = fileBytes.decode("utf-8", errors="replace")
        return [ContentPart(
            id=makeId(),
            parentId=None,
            label="main",
            typeGroup="text",
            mimeType=mimeType,
            data=data,
            metadata={"size": len(fileBytes)}
        )]
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@ -67,10 +67,12 @@ class ExtractionService:
                if part.metadata:
                    logger.debug(f"    Metadata: {part.metadata}")
-            # Attach document id to parts if missing
+            # Attach document id and MIME type to parts if missing
            for p in ec.parts:
                if "documentId" not in p.metadata:
                    p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4())
                if "documentMimeType" not in p.metadata:
                    p.metadata["documentMimeType"] = documentData["mimeType"]
            # Log chunking information
            chunked_parts = [p for p in ec.parts if p.metadata.get("chunk", False)]
--- a/modules/services/serviceExtraction/merging/default_merger.py
+++ b/modules/services/serviceExtraction/merging/default_merger.py
--- a/modules/services/serviceExtraction/merging/table_merger.py
+++ b/modules/services/serviceExtraction/merging/table_merger.py
--- a/modules/services/serviceExtraction/merging/text_merger.py
+++ b/modules/services/serviceExtraction/merging/text_merger.py
--- a/modules/services/serviceExtraction/subMerger.py
+++ b/modules/services/serviceExtraction/subMerger.py
@ -0,0 +1,209 @@
 """
 Intelligent Token-Aware Merger for optimizing AI calls based on LLM token limits.
 """
 from typing import List, Dict, Any, Tuple
 import logging
 from modules.datamodels.datamodelExtraction import ContentPart
 from .subUtils import makeId
 logger = logging.getLogger(__name__)
 class IntelligentTokenAwareMerger:
    """
    Intelligent merger that groups chunks based on LLM token limits to minimize AI calls.
    Strategy:
    1. Calculate token count for each chunk
    2. Group chunks to maximize token usage without exceeding limits
    3. Preserve document structure and semantic boundaries
    4. Minimize total number of AI calls
    """
    def __init__(self, model_capabilities: Dict[str, Any]):
        self.max_tokens = model_capabilities.get("maxTokens", 4000)
        self.safety_margin = model_capabilities.get("safetyMargin", 0.1)
        self.effective_max_tokens = int(self.max_tokens * (1 - self.safety_margin))
        self.chars_per_token = model_capabilities.get("charsPerToken", 4)  # Rough estimation
    def merge_chunks_intelligently(self, chunks: List[ContentPart], prompt: str = "") -> List[ContentPart]:
        """
        Merge chunks intelligently based on token limits.
        Args:
            chunks: List of ContentPart chunks to merge
            prompt: AI prompt to account for in token calculation
        Returns:
            List of optimally merged ContentPart objects
        """
        if not chunks:
            return chunks
        logger.info(f"🧠 Intelligent merging: {len(chunks)} chunks, max_tokens={self.effective_max_tokens}")
        # Calculate tokens for prompt
        prompt_tokens = self._estimate_tokens(prompt)
        available_tokens = self.effective_max_tokens - prompt_tokens
        logger.info(f"📊 Prompt tokens: {prompt_tokens}, Available for content: {available_tokens}")
        # Group chunks by document and type for semantic coherence
        grouped_chunks = self._group_chunks_by_document_and_type(chunks)
        merged_parts = []
        for group_key, group_chunks in grouped_chunks.items():
            logger.info(f"📁 Processing group: {group_key} ({len(group_chunks)} chunks)")
            # Merge chunks within this group optimally
            group_merged = self._merge_group_optimally(group_chunks, available_tokens)
            merged_parts.extend(group_merged)
        logger.info(f"✅ Intelligent merging complete: {len(chunks)} → {len(merged_parts)} parts")
        return merged_parts
    def _group_chunks_by_document_and_type(self, chunks: List[ContentPart]) -> Dict[str, List[ContentPart]]:
        """Group chunks by document and type for semantic coherence."""
        groups = {}
        for chunk in chunks:
            # Create group key: document_id + type_group
            doc_id = chunk.metadata.get("documentId", "unknown")
            type_group = chunk.typeGroup
            group_key = f"{doc_id}_{type_group}"
            if group_key not in groups:
                groups[group_key] = []
            groups[group_key].append(chunk)
        return groups
    def _merge_group_optimally(self, chunks: List[ContentPart], available_tokens: int) -> List[ContentPart]:
        """Merge chunks within a group optimally to minimize AI calls."""
        if not chunks:
            return []
        # Sort chunks by size (smallest first for better packing)
        sorted_chunks = sorted(chunks, key=lambda c: self._estimate_tokens(c.data))
        merged_parts = []
        current_group = []
        current_tokens = 0
        for chunk in sorted_chunks:
            chunk_tokens = self._estimate_tokens(chunk.data)
            # Special case: If single chunk is already at max size, process it alone
            if chunk_tokens >= available_tokens * 0.9:  # 90% of available tokens
                # Finalize current group if it exists
                if current_group:
                    merged_part = self._create_merged_part(current_group, current_tokens)
                    merged_parts.append(merged_part)
                    current_group = []
                    current_tokens = 0
                # Process large chunk individually
                merged_parts.append(chunk)
                logger.debug(f"🔍 Large chunk processed individually: {chunk_tokens} tokens")
                continue
            # If adding this chunk would exceed limit, finalize current group
            if current_tokens + chunk_tokens > available_tokens and current_group:
                merged_part = self._create_merged_part(current_group, current_tokens)
                merged_parts.append(merged_part)
                current_group = [chunk]
                current_tokens = chunk_tokens
            else:
                current_group.append(chunk)
                current_tokens += chunk_tokens
        # Finalize remaining group
        if current_group:
            merged_part = self._create_merged_part(current_group, current_tokens)
            merged_parts.append(merged_part)
        logger.info(f"📦 Group merged: {len(chunks)} → {len(merged_parts)} parts")
        return merged_parts
    def _create_merged_part(self, chunks: List[ContentPart], total_tokens: int) -> ContentPart:
        """Create a merged ContentPart from multiple chunks."""
        if len(chunks) == 1:
            return chunks[0]  # No need to merge single chunk
        # Combine data with semantic separators
        combined_data = self._combine_chunk_data(chunks)
        # Use metadata from first chunk as base
        base_chunk = chunks[0]
        merged_metadata = base_chunk.metadata.copy()
        merged_metadata.update({
            "merged": True,
            "originalChunkCount": len(chunks),
            "totalTokens": total_tokens,
            "originalChunkIds": [c.id for c in chunks],
            "size": len(combined_data.encode('utf-8'))
        })
        merged_part = ContentPart(
            id=makeId(),
            parentId=base_chunk.parentId,
            label=f"merged_{len(chunks)}_chunks",
            typeGroup=base_chunk.typeGroup,
            mimeType=base_chunk.mimeType,
            data=combined_data,
            metadata=merged_metadata
        )
        logger.debug(f"🔗 Created merged part: {len(chunks)} chunks, {total_tokens} tokens")
        return merged_part
    def _combine_chunk_data(self, chunks: List[ContentPart]) -> str:
        """Combine chunk data with appropriate separators."""
        if not chunks:
            return ""
        # Use different separators based on content type
        if chunks[0].typeGroup == "text":
            separator = "\n\n---\n\n"  # Clear text separation
        elif chunks[0].typeGroup == "table":
            separator = "\n\n[TABLE BREAK]\n\n"  # Table separation
        else:
            separator = "\n\n---\n\n"  # Default separation
        return separator.join([chunk.data for chunk in chunks])
    def _estimate_tokens(self, text: str) -> int:
        """Estimate token count for text."""
        if not text:
            return 0
        return len(text) // self.chars_per_token
    def calculate_optimization_stats(self, original_chunks: List[ContentPart], merged_parts: List[ContentPart]) -> Dict[str, Any]:
        """Calculate optimization statistics with detailed analysis."""
        original_calls = len(original_chunks)
        optimized_calls = len(merged_parts)
        reduction_percent = ((original_calls - optimized_calls) / original_calls * 100) if original_calls > 0 else 0
        # Analyze chunk sizes
        large_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) >= self.effective_max_tokens * 0.9]
        small_chunks = [c for c in original_chunks if self._estimate_tokens(c.data) < self.effective_max_tokens * 0.9]
        # Calculate theoretical maximum optimization (if all small chunks could be merged)
        theoretical_min_calls = len(large_chunks) + max(1, len(small_chunks) // 3)  # Assume 3 small chunks per call
        theoretical_reduction = ((original_calls - theoretical_min_calls) / original_calls * 100) if original_calls > 0 else 0
        return {
            "original_ai_calls": original_calls,
            "optimized_ai_calls": optimized_calls,
            "reduction_percent": round(reduction_percent, 1),
            "cost_savings": f"{reduction_percent:.1f}%",
            "efficiency_gain": f"{original_calls / optimized_calls:.1f}x" if optimized_calls > 0 else "∞",
            "analysis": {
                "large_chunks": len(large_chunks),
                "small_chunks": len(small_chunks),
                "theoretical_min_calls": theoretical_min_calls,
                "theoretical_reduction": round(theoretical_reduction, 1),
                "optimization_potential": "high" if reduction_percent > 50 else "moderate" if reduction_percent > 20 else "low"
            }
        }
--- a/modules/services/serviceExtraction/subPipeline.py
+++ b/modules/services/serviceExtraction/subPipeline.py
@ -3,11 +3,13 @@ import logging
 import os
 from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart
 from modules.shared.configuration import APP_CONFIG
 from .subUtils import makeId
 from .subRegistry import ExtractorRegistry, ChunkerRegistry
-from .merging.text_merger import TextMerger
+from .merging.mergerText import TextMerger
-from .merging.table_merger import TableMerger
+from .merging.mergerTable import TableMerger
-from .merging.default_merger import DefaultMerger
+from .merging.mergerDefault import DefaultMerger
 from .subMerger import IntelligentTokenAwareMerger
 logger = logging.getLogger(__name__)
@ -84,46 +86,55 @@ def runExtraction(extractorRegistry: ExtractorRegistry, chunkerRegistry: Chunker
        chunk_parts = [p for p in parts if p.metadata.get("chunk", False)]
        logger.debug(f"runExtraction: Preserving {len(chunk_parts)} chunks from merging")
        logger.debug(f"runExtraction - non_chunk_parts: {len(non_chunk_parts)}, chunk_parts: {len(chunk_parts)}")
-        if non_chunk_parts:
+    # Apply intelligent merging for small text parts
    if non_chunk_parts:
        # Count text parts
        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
        if len(text_parts) > 5:  # If we have many small text parts, merge them
            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
            non_chunk_parts = _mergeParts(non_chunk_parts, mergeStrategy)
        # Combine non-chunk parts with chunk parts (chunks stay separate)
        parts = non_chunk_parts + chunk_parts
        logger.debug(f"runExtraction: Final parts after merging: {len(parts)} (chunks: {len(chunk_parts)})")
-    # DEBUG: dump parts and chunks to files  TODO TO REMOVE
+        logger.debug(f"runExtraction - Final parts: {len(parts)} (chunks: {len(chunk_parts)})")
    # DEBUG: dump parts and chunks to files - only if debug enabled
    try:
-        base_dir = "./test-chat/ai"
+        debug_enabled = APP_CONFIG.get("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
-        os.makedirs(base_dir, exist_ok=True)
+        if debug_enabled:
-        
+            base_dir = "./test-chat/ai"
-        # Generate timestamp for consistent naming
+            os.makedirs(base_dir, exist_ok=True)
-        from datetime import datetime, UTC
+            
-        ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
+            # Generate timestamp for consistent naming
-        
+            from datetime import datetime, UTC
-        # Write a summary file
+            ts = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3]
-        summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
+            
-        text_index = 0
+            # Write a summary file
-        for idx, part in enumerate(parts):
+            summary_lines: List[str] = [f"fileName: {fileName}", f"mimeType: {mimeType}", f"totalParts: {len(parts)}"]
-            is_texty = part.typeGroup in ("text", "table", "structure")
+            text_index = 0
-            size = int(part.metadata.get("size", 0) or 0)
+            for idx, part in enumerate(parts):
-            is_chunk = bool(part.metadata.get("chunk", False))
+                is_texty = part.typeGroup in ("text", "table", "structure")
-            summary_lines.append(
+                size = int(part.metadata.get("size", 0) or 0)
-                f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
+                is_chunk = bool(part.metadata.get("chunk", False))
-            )
+                summary_lines.append(
-            if is_texty and getattr(part, "data", None):
+                    f"part[{idx}]: typeGroup={part.typeGroup}, label={part.label}, size={size}, chunk={is_chunk}"
-                text_index += 1
+                )
-                fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
+                if is_texty and getattr(part, "data", None):
-                fpath = os.path.join(base_dir, fname)
+                    text_index += 1
-                with open(fpath, "w", encoding="utf-8") as f:
+                    fname = f"{ts}_extract_{fileName}_part_{idx:03d}_{'chunk' if is_chunk else 'full'}_{text_index:03d}.txt"
-                    f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
+                    fpath = os.path.join(base_dir, fname)
-                    f.write(str(part.data))
+                    with open(fpath, "w", encoding="utf-8") as f:
-        
+                        f.write(f"# typeGroup: {part.typeGroup}\n# label: {part.label}\n# chunk: {is_chunk}\n# size: {size}\n\n")
-        # Write summary file
+                        f.write(str(part.data))
-        summary_fname = f"{ts}_extract_{fileName}_summary.txt"
+            
-        summary_fpath = os.path.join(base_dir, summary_fname)
+            # Write summary file
-        with open(summary_fpath, "w", encoding="utf-8") as f:
+            summary_fname = f"{ts}_extract_{fileName}_summary.txt"
-            f.write("\n".join(summary_lines))
+            summary_fpath = os.path.join(base_dir, summary_fname)
            with open(summary_fpath, "w", encoding="utf-8") as f:
                f.write("\n".join(summary_lines))
    except Exception as _e:
        logger.debug(f"Debug dump skipped: {_e}")
@ -146,13 +157,22 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
    kept: List[ContentPart] = []
    remaining: List[ContentPart] = []
-    for p in parts:
+    logger.debug(f"Starting poolAndLimit with {len(parts)} parts, maxSize={maxSize}")
    for i, p in enumerate(parts):
        size = int(p.metadata.get("size", 0) or 0)
        # Show first 50 characters of text content for debugging
        content_preview = p.data[:50].replace('\n', '\\n') if p.data else ""
        logger.debug(f"Part {i}: {p.typeGroup} - {size} bytes - '{content_preview}...' (current: {current})")
        if current + size <= maxSize:
            kept.append(p)
            current += size
            logger.debug(f"Part {i} kept (total: {current})")
        else:
            remaining.append(p)
            logger.debug(f"Part {i} moved to remaining")
    logger.debug(f"Kept: {len(kept)}, Remaining: {len(remaining)}")
    # If we have remaining parts and chunking is allowed, try chunking
    if remaining and chunkAllowed:
@ -160,12 +180,15 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
        logger.debug(f"Remaining parts to chunk: {len(remaining)}")
        logger.debug(f"Max size limit: {maxSize} bytes")
        logger.debug(f"Current size used: {current} bytes")
        logger.debug(f"Chunking {len(remaining)} remaining parts")
        for p in remaining:
-            if p.typeGroup in ("text", "table", "structure", "image"):
+            if p.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
                logger.debug(f"Chunking {p.typeGroup} part: {len(p.data)} chars")
                logger.debug(f"Chunking {p.typeGroup} part with {len(p.data)} chars")
                chunks = chunkerRegistry.resolve(p.typeGroup).chunk(p, options)
                logger.debug(f"Created {len(chunks)} chunks")
                logger.debug(f"Created {len(chunks)} chunks")
                chunks_added = 0
                for ch in chunks:
@ -197,12 +220,18 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
        logger.debug(f"Preserving {len(chunk_parts)} chunks from merging")
-        if non_chunk_parts:
+    # Apply intelligent merging for small text parts
    if non_chunk_parts:
        # Count text parts
        text_parts = [p for p in non_chunk_parts if p.typeGroup == "text"]
        if len(text_parts) > 5:  # If we have many small text parts, merge them
            logger.info(f"🔧 Merging {len(text_parts)} small text parts for efficiency")
            non_chunk_parts = _applyMerging(non_chunk_parts, mergeStrategy)
        # Combine non-chunk parts with chunk parts (chunks stay separate)
        kept = non_chunk_parts + chunk_parts
        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
        logger.debug(f"Final parts after merging: {len(kept)} (chunks: {len(chunk_parts)})")
        # Re-check size after merging
@ -211,11 +240,30 @@ def poolAndLimit(parts: List[ContentPart], chunkerRegistry: ChunkerRegistry, opt
            # Apply size limit to merged parts
            kept = _applySizeLimit(kept, maxSize)
    logger.debug(f"poolAndLimit returning {len(kept)} parts")
    return kept
 def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[ContentPart]:
-    """Apply merging strategy to parts."""
+    """Apply merging strategy to parts with intelligent token-aware merging."""
    logger.debug(f"_applyMerging called with {len(parts)} parts")
    # Check if intelligent merging is enabled
    if strategy.get("useIntelligentMerging", False):
        model_capabilities = strategy.get("modelCapabilities", {})
        subMerger = IntelligentTokenAwareMerger(model_capabilities)
        # Use intelligent merging for all parts
        merged = subMerger.merge_chunks_intelligently(parts, strategy.get("prompt", ""))
        # Calculate and log optimization stats
        stats = subMerger.calculate_optimization_stats(parts, merged)
        logger.info(f"🧠 Intelligent merging stats: {stats}")
        logger.debug(f"Intelligent merging: {stats['original_ai_calls']} → {stats['optimized_ai_calls']} calls ({stats['reduction_percent']}% reduction)")
        return merged
    # Fallback to traditional merging
    textMerger = TextMerger()
    tableMerger = TableMerger()
    defaultMerger = DefaultMerger()
@ -226,18 +274,29 @@ def _applyMerging(parts: List[ContentPart], strategy: Dict[str, Any]) -> List[Co
    structureParts = [p for p in parts if p.typeGroup == "structure"]
    otherParts = [p for p in parts if p.typeGroup not in ("text", "table", "structure")]
    logger.debug(f"Grouped - text: {len(textParts)}, table: {len(tableParts)}, structure: {len(structureParts)}, other: {len(otherParts)}")
    merged: List[ContentPart] = []
    if textParts:
-        merged.extend(textMerger.merge(textParts, strategy))
+        textMerged = textMerger.merge(textParts, strategy)
        logger.debug(f"TextMerger merged {len(textParts)} parts into {len(textMerged)} parts")
        merged.extend(textMerged)
    if tableParts:
-        merged.extend(tableMerger.merge(tableParts, strategy))
+        tableMerged = tableMerger.merge(tableParts, strategy)
        logger.debug(f"TableMerger merged {len(tableParts)} parts into {len(tableMerged)} parts")
        merged.extend(tableMerged)
    if structureParts:
        # For now, treat structure like text
-        merged.extend(textMerger.merge(structureParts, strategy))
+        structureMerged = textMerger.merge(structureParts, strategy)
        logger.debug(f"StructureMerger merged {len(structureParts)} parts into {len(structureMerged)} parts")
        merged.extend(structureMerged)
    if otherParts:
-        merged.extend(defaultMerger.merge(otherParts, strategy))
+        otherMerged = defaultMerger.merge(otherParts, strategy)
        logger.debug(f"DefaultMerger merged {len(otherParts)} parts into {len(otherMerged)} parts")
        merged.extend(otherMerged)
    logger.debug(f"_applyMerging returning {len(merged)} parts")
    return merged
--- a/modules/services/serviceExtraction/subRegistry.py
+++ b/modules/services/serviceExtraction/subRegistry.py
@ -1,14 +1,37 @@
 from typing import Any, Dict, Optional
 import logging
 from modules.datamodels.datamodelExtraction import ContentPart
 logger = logging.getLogger(__name__)
 class Extractor:
    """
    Base class for all document extractors.
    Each extractor should implement:
    - detect(): Check if this extractor can handle the given file
    - extract(): Extract content from the file
    - getSupportedExtensions(): Return supported file extensions
    - getSupportedMimeTypes(): Return supported MIME types
    """
    def detect(self, fileName: str, mimeType: str, headBytes: bytes) -> bool:
        """Check if this extractor can handle the given file."""
        return False
    def extract(self, fileBytes: bytes, context: Dict[str, Any]) -> list[ContentPart]:
        """Extract content from the file bytes."""
        raise NotImplementedError
    def getSupportedExtensions(self) -> list[str]:
        """Return list of supported file extensions (including dots)."""
        return []
    def getSupportedMimeTypes(self) -> list[str]:
        """Return list of supported MIME types."""
        return []
 class Chunker:
@ -20,50 +43,85 @@ class ExtractorRegistry:
    def __init__(self):
        self._map: Dict[str, Extractor] = {}
        self._fallback: Optional[Extractor] = None
-        # Register built-ins
+        self._auto_discover_extractors()
    def _auto_discover_extractors(self):
        """Auto-discover and register all extractors from the extractors directory."""
        try:
-            from .formats.text_extractor import TextExtractor
+            import os
-            from .formats.csv_extractor import CsvExtractor
+            import importlib
-            from .formats.json_extractor import JsonExtractor
+            from pathlib import Path
-            from .formats.xml_extractor import XmlExtractor
+            
-            from .formats.html_extractor import HtmlExtractor
+            # Get the extractors directory
-            from .formats.pdf_extractor import PdfExtractor
+            current_dir = Path(__file__).parent
-            from .formats.docx_extractor import DocxExtractor
+            extractors_dir = current_dir / "extractors"
-            from .formats.xlsx_extractor import XlsxExtractor
+            
-            from .formats.image_extractor import ImageExtractor
+            if not extractors_dir.exists():
-            from .formats.binary_extractor import BinaryExtractor
+                logger.error(f"Extractors directory not found: {extractors_dir}")
-            self.register("text/plain", TextExtractor())
+                return
-            self.register("text/markdown", TextExtractor())
+            
-            self.register("text/csv", CsvExtractor())
+            # Import all extractor modules
-            self.register("application/json", JsonExtractor())
+            extractor_modules = []
-            self.register("application/xml", XmlExtractor())
+            for file_path in extractors_dir.glob("extractor*.py"):
-            self.register("text/html", HtmlExtractor())
+                if file_path.name == "__init__.py":
-            self.register("application/pdf", PdfExtractor())
+                    continue
-            self.register("application/vnd.openxmlformats-officedocument.wordprocessingml.document", DocxExtractor())
+                
-            self.register("application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", XlsxExtractor())
+                module_name = file_path.stem
-            # images
+                try:
-            self.register("image/jpeg", ImageExtractor())
+                    # Import the module
-            self.register("image/png", ImageExtractor())
+                    module = importlib.import_module(f".{module_name}", package="modules.services.serviceExtraction.extractors")
-            self.register("image/gif", ImageExtractor())
+                    
-            # extension fallbacks
+                    # Find all extractor classes in the module
-            self.register("txt", TextExtractor())
+                    for attr_name in dir(module):
-            self.register("md", TextExtractor())
+                        attr = getattr(module, attr_name)
-            self.register("csv", CsvExtractor())
+                        if (isinstance(attr, type) and 
-            self.register("json", JsonExtractor())
+                            issubclass(attr, Extractor) and 
-            self.register("xml", XmlExtractor())
+                            attr != Extractor and
-            self.register("html", HtmlExtractor())
+                            not attr_name.startswith('_')):
-            self.register("htm", HtmlExtractor())
+                            
-            self.register("pdf", PdfExtractor())
+                            # Create instance and auto-register
-            self.register("docx", DocxExtractor())
+                            extractor_instance = attr()
-            self.register("xlsx", XlsxExtractor())
+                            self._auto_register_extractor(extractor_instance)
-            self.register("xlsm", XlsxExtractor())
+                            extractor_modules.append(attr_name)
-            # fallback
+                            
-            self.setFallback(BinaryExtractor())
+                except Exception as e:
-            print(f"✅ ExtractorRegistry: Successfully registered {len(self._map)} extractors")
+                    logger.warning(f"Failed to import {module_name}: {str(e)}")
                    continue
            # Set fallback extractor
            try:
                from .extractors.extractorBinary import BinaryExtractor
                self.setFallback(BinaryExtractor())
            except Exception as e:
                logger.warning(f"Failed to set fallback extractor: {str(e)}")
            logger.info(f"ExtractorRegistry: Auto-discovered and registered {len(extractor_modules)} extractor classes: {', '.join(extractor_modules)}")
            logger.info(f"ExtractorRegistry: Total registered formats: {len(self._map)}")
        except Exception as e:
-            print(f"❌ ExtractorRegistry: Failed to register extractors: {str(e)}")
+            logger.error(f"ExtractorRegistry: Failed to auto-discover extractors: {str(e)}")
            import traceback
            traceback.print_exc()
    def _auto_register_extractor(self, extractor: Extractor):
        """Auto-register an extractor based on its declared supported formats."""
        try:
            # Register MIME types
            mime_types = extractor.getSupportedMimeTypes()
            for mime_type in mime_types:
                self.register(mime_type, extractor)
                logger.debug(f"Registered MIME type: {mime_type} → {extractor.__class__.__name__}")
            # Register file extensions
            extensions = extractor.getSupportedExtensions()
            for ext in extensions:
                # Remove leading dot for registry key
                ext_key = ext.lstrip('.')
                self.register(ext_key, extractor)
                logger.debug(f"Registered extension: .{ext_key} → {extractor.__class__.__name__}")
        except Exception as e:
            logger.error(f"Failed to auto-register {extractor.__class__.__name__}: {str(e)}")
    def register(self, key: str, extractor: Extractor):
        self._map[key] = extractor
@ -80,6 +138,43 @@ class ExtractorRegistry:
            if ext in self._map:
                return self._map[ext]
        return self._fallback
    def getAllSupportedFormats(self) -> Dict[str, Dict[str, list[str]]]:
        """
        Get all supported formats from all registered extractors.
        Returns:
            Dictionary with format information:
            {
                "extensions": {
                    "extractor_name": [".ext1", ".ext2", ...]
                },
                "mime_types": {
                    "extractor_name": ["mime/type1", "mime/type2", ...]
                }
            }
        """
        formats = {"extensions": {}, "mime_types": {}}
        # Get formats from registered extractors
        for key, extractor in self._map.items():
            if hasattr(extractor, 'getSupportedExtensions'):
                extensions = extractor.getSupportedExtensions()
                if extensions:
                    formats["extensions"][key] = extensions
            if hasattr(extractor, 'getSupportedMimeTypes'):
                mime_types = extractor.getSupportedMimeTypes()
                if mime_types:
                    formats["mime_types"][key] = mime_types
        # Add fallback extractor info
        if self._fallback and hasattr(self._fallback, 'getSupportedExtensions'):
            formats["extensions"]["fallback"] = self._fallback.getSupportedExtensions()
        if self._fallback and hasattr(self._fallback, 'getSupportedMimeTypes'):
            formats["mime_types"]["fallback"] = self._fallback.getSupportedMimeTypes()
        return formats
 class ChunkerRegistry:
@ -88,17 +183,19 @@ class ChunkerRegistry:
        self._noop = Chunker()
        # Register default chunkers
        try:
-            from .chunking.text_chunker import TextChunker
+            from .chunking.chunkerText import TextChunker
-            from .chunking.table_chunker import TableChunker
+            from .chunking.chunkerTable import TableChunker
-            from .chunking.structure_chunker import StructureChunker
+            from .chunking.chunkerStructure import StructureChunker
-            # Skip ImageChunker for now to avoid PIL import hang
+            from .chunking.chunkerImage import ImageChunker
            # from .chunking.image_chunker import ImageChunker
            self.register("text", TextChunker())
            self.register("table", TableChunker())
            self.register("structure", StructureChunker())
-            # self.register("image", ImageChunker())
+            self.register("image", ImageChunker())
            # Use text chunker for container and binary content
            self.register("container", TextChunker())
            self.register("binary", TextChunker())
        except Exception as e:
-            print(f"❌ ChunkerRegistry: Failed to register chunkers: {str(e)}")
+            logger.error(f"ChunkerRegistry: Failed to register chunkers: {str(e)}")
            import traceback
            traceback.print_exc()
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -1,6 +1,7 @@
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+import json
 from typing import Any, Dict, List, Optional, Union, Tuple
 from datetime import datetime, UTC
 import re
 from modules.shared.timezoneUtils import get_utc_timestamp
@ -18,7 +19,7 @@ logger = logging.getLogger(__name__)
 class GenerationService:
    def __init__(self, serviceCenter=None):
        # Directly use interfaces from the provided service center (no self.service calls)
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
        self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
        self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None
@ -296,101 +297,237 @@ class GenerationService:
                'workflowId': 'unknown'
            }
-    async def renderReport(self, extracted_content: str, output_format: str, title: str) -> tuple[str, str]:
+    async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
        """
-        Render extracted content to the specified output format.
+        Render extracted JSON content to the specified output format.
        Args:
-            extracted_content: Content extracted by AI using format-specific prompt
+            extractedContent: Structured JSON document from AI extraction
-            output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
+            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
            title: Report title
            userPrompt: User's original prompt for report generation
            aiService: AI service instance for generation prompt creation
        Returns:
            tuple: (rendered_content, mime_type)
        """
        try:
-            # DEBUG: dump renderer input to diagnose JSON+HTML mixtures TODO REMOVE
+            # Validate JSON input
            if not isinstance(extractedContent, dict):
                raise ValueError("extractedContent must be a JSON dictionary")
            if "sections" not in extractedContent:
                raise ValueError("extractedContent must contain 'sections' field")
            # DEBUG: Log renderer input metadata only (no verbose JSON) - only if debug enabled
            try:
-                import os
+                debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
-                ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                if debug_enabled:
-                debug_root = "./test-chat/ai"
+                    import os
-                debug_dir = os.path.join(debug_root, f"render_input_{ts}")
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
-                os.makedirs(debug_dir, exist_ok=True)
+                    debug_root = "./test-chat/ai"
-                with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
+                    debug_dir = os.path.join(debug_root, f"render_input_{ts}")
-                    f.write(f"title: {title}\nformat: {output_format}\nlength: {len(extracted_content or '')}\nstarts_with_brace: {str(extracted_content.strip().startswith('{') if extracted_content else False)}\n")
+                    os.makedirs(debug_dir, exist_ok=True)
-                with open(os.path.join(debug_dir, "extracted_content.txt"), "w", encoding="utf-8") as f:
+                    with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
-                    f.write(extracted_content or "")
+                        f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
                        f.write(f"content_size: {len(str(extractedContent))} characters\n")
                        f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n")
            except Exception:
                pass
            # Get the appropriate renderer for the format
-            renderer = self._getFormatRenderer(output_format)
+            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
-                raise ValueError(f"Unsupported output format: {output_format}")
+                raise ValueError(f"Unsupported output format: {outputFormat}")
-            # Render the content
+            # Render the JSON content directly (AI generation handled by main service)
-            rendered_content, mime_type = await renderer.render(extracted_content, title)
+            renderedContent, mimeType = await renderer.render(extractedContent, title, userPrompt, aiService)
            # DEBUG: dump rendered output
            try:
                import os
                with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
-                    f.write(rendered_content or "")
+                    f.write(renderedContent or "")
            except Exception:
                pass
-            logger.info(f"Successfully rendered report to {output_format} format: {len(rendered_content)} characters")
+            logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
-            return rendered_content, mime_type
+            return renderedContent, mimeType
        except Exception as e:
-            logger.error(f"Error rendering report to {output_format}: {str(e)}")
+            logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
            raise
-    def getExtractionPrompt(self, output_format: str, user_prompt: str, title: str) -> str:
+    async def getAdaptiveExtractionPrompt(
        self,
        outputFormat: str,
        userPrompt: str,
        title: str,
        promptAnalysis: Dict[str, Any],
        aiService=None
    ) -> str:
        """Get adaptive extraction prompt based on AI analysis."""
        from .subPromptBuilder import buildAdaptiveExtractionPrompt
        return await buildAdaptiveExtractionPrompt(
            outputFormat=outputFormat,
            userPrompt=userPrompt,
            title=title,
            promptAnalysis=promptAnalysis,
            aiService=aiService,
            services=self.services
        )
    async def getGenerationPrompt(
        self,
        outputFormat: str,
        userPrompt: str,
        title: str,
        aiService=None
    ) -> str:
        """Get generation prompt for enhancing extracted JSON content."""
        from .subPromptBuilder import buildGenerationPrompt
        return await buildGenerationPrompt(
            outputFormat=outputFormat,
            userPrompt=userPrompt,
            title=title,
            aiService=aiService,
            services=self.services
        )
    async def getGenericExtractionPrompt(
        self,
        outputFormat: str,
        userPrompt: str,
        title: str,
        aiService=None
    ) -> str:
        """Get generic extraction prompt that works for both single and multi-file."""
        from .subPromptBuilder import buildGenericExtractionPrompt
        return await buildGenericExtractionPrompt(
            outputFormat=outputFormat,
            userPrompt=userPrompt,
            title=title,
            aiService=aiService,
            services=self.services
        )
    async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
        """
        Get the format-specific extraction prompt for AI content extraction.
        Args:
-            output_format: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
+            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
-            user_prompt: User's original prompt for report generation
+            userPrompt: User's original prompt for report generation
            title: Report title
            aiService: AI service instance for intent extraction
        Returns:
            str: Format-specific prompt for AI extraction
        """
        try:
            # Get the appropriate renderer for the format
-            renderer = self._getFormatRenderer(output_format)
+            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
-                raise ValueError(f"Unsupported output format: {output_format}")
+                raise ValueError(f"Unsupported output format: {outputFormat}")
            # Build centralized prompt with generic rules + format-specific guidelines
-            from .prompt_builder import buildExtractionPrompt
+            from .subPromptBuilder import buildExtractionPrompt
-            extraction_prompt = buildExtractionPrompt(
+            extractionPrompt = await buildExtractionPrompt(
-                output_format=output_format,
+                outputFormat=outputFormat,
                renderer=renderer,
-                user_prompt=user_prompt,
+                userPrompt=userPrompt,
-                title=title
+                title=title,
                aiService=aiService,
                services=self.services
            )
-            logger.info(f"Generated {output_format}-specific extraction prompt: {len(extraction_prompt)} characters")
+            logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters")
-            return extraction_prompt
+            return extractionPrompt
        except Exception as e:
-            logger.error(f"Error getting extraction prompt for {output_format}: {str(e)}")
+            logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
            raise
    async def renderAdaptiveReport(
        self,
        extractedContent: Dict[str, Any],
        outputFormat: str,
        title: str,
        userPrompt: str = None,
        aiService=None,
        isMultiFile: bool = False
    ) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
        """Render report adaptively based on content structure."""
        if isMultiFile and "documents" in extractedContent:
            return await self._renderMultiFileReport(
                extractedContent, outputFormat, title, userPrompt, aiService
            )
        else:
            return await self._renderSingleFileReport(
                extractedContent, outputFormat, title, userPrompt, aiService
            )
    async def _renderMultiFileReport(
        self,
        extractedContent: Dict[str, Any],
        outputFormat: str,
        title: str,
        userPrompt: str = None,
        aiService=None
    ) -> List[Dict[str, Any]]:
        """Render multiple documents from extracted content."""
        generated_documents = []
        for doc_data in extractedContent.get("documents", []):
            # Use existing single-file renderer for each document
            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
                continue
            # Render individual document
            rendered_content, mime_type = await renderer.render(
                extractedContent={"sections": doc_data["sections"]},
                title=doc_data["title"],
                userPrompt=userPrompt,
                aiService=aiService
            )
            generated_documents.append({
                "filename": doc_data["filename"],
                "content": rendered_content,
                "mime_type": mime_type,
                "title": doc_data["title"]
            })
        return generated_documents
    async def _renderSingleFileReport(
        self,
        extractedContent: Dict[str, Any],
        outputFormat: str,
        title: str,
        userPrompt: str = None,
        aiService=None
    ) -> Tuple[str, str]:
        """Render single file report (existing functionality)."""
        # Use existing renderReport method
        return await self.renderReport(
            extractedContent, outputFormat, title, userPrompt, aiService
        )
    def _getFormatRenderer(self, output_format: str):
        """Get the appropriate renderer for the specified format using auto-discovery."""
        try:
            from .renderers.registry import get_renderer
-            renderer = get_renderer(output_format)
+            renderer = get_renderer(output_format, services=self.services)
            if renderer:
                return renderer
            # Fallback to text renderer if no specific renderer found
            logger.warning(f"No renderer found for format {output_format}, falling back to text")
-            fallback_renderer = get_renderer('text')
+            fallback_renderer = get_renderer('text', services=self.services)
            if fallback_renderer:
                return fallback_renderer
--- a/modules/services/serviceGeneration/prompt_builder.py
+++ b/modules/services/serviceGeneration/prompt_builder.py
@ -1,72 +0,0 @@
 """
 Centralized prompt builder for document generation across formats.
 Builds a robust prompt that:
 - Accepts any user intent (no fixed structure assumptions)
 - Injects format-specific guidelines from the selected renderer
 - Adds a common policy section to always use real data from source docs
 - Requires the AI to output a filename header that we can parse and use
 """
 from typing import Protocol
 class _RendererLike(Protocol):
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:  # returns only format-specific guidelines
        ...
 def buildExtractionPrompt(
    output_format: str,
    renderer: _RendererLike,
    user_prompt: str,
    title: str
 ) -> str:
    """
    Build the final extraction prompt by combining:
    - The raw user prompt (verbatim)
    - Generic cross-format instructions (filename header + real-data policy)
    - Format-specific guidelines snippet provided by the renderer
    The AI must place a single filename header at the very top:
    FILENAME: <safe-file-name-with-extension>
    followed by a blank line and then ONLY the document content according to the target format.
    """
    format_guidelines = renderer.getExtractionPrompt(user_prompt, title)
    # Generic block appears once for every format
    generic_intro = f"""
 {user_prompt}
 You are generating a document in {output_format.upper()} format for the title: "{title}".
 Rules:
 - The user's intent fully defines the structure. Do not assume a fixed template or headings.
 - Use only factual information extracted from the supplied source documents.
 - Do not invent, hallucinate, or include placeholders (e.g., "lorem ipsum", "TBD").
 - The output must strictly follow the target format and be ready for saving without extra wrapping.
 - At the VERY TOP output exactly one line with the filename header:
  FILENAME: <safe-file-name-with-extension>
  - The base name should be short, descriptive, and kebab-case or snake-case without spaces.
  - Include the correct extension for the requested format (e.g., .html, .pdf, .docx, .md, .txt, .json, .csv, .xlsx).
  - Avoid special characters beyond [a-zA-Z0-9-_].
  - After this header, insert a single blank line and then provide ONLY the document content.
 Common policy:
 - Use the actual data from the source documents to create the content.
 - Do not generate placeholder text or templates.
 - Extract and use the real data provided in the source documents to create meaningful content.
 """.strip()
    # Final assembly
    final_prompt = (
        generic_intro
        + "\n\nFORMAT-SPECIFIC GUIDELINES:\n"
        + format_guidelines.strip()
        + "\n\nGenerate the complete document content now based on the source documents below:"
    )
    return final_prompt
--- a/modules/services/serviceGeneration/renderers/base_renderer.py
+++ b/modules/services/serviceGeneration/renderers/base_renderer.py
@ -1,86 +0,0 @@
 """
 Base renderer class for all format renderers.
 """
 from abc import ABC, abstractmethod
 from typing import Dict, Any, Tuple, List
 import logging
 logger = logging.getLogger(__name__)
 class BaseRenderer(ABC):
    """Base class for all format renderers."""
    def __init__(self):
        self.logger = logger
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """
        Return list of supported format names for this renderer.
        Override this method in subclasses to specify supported formats.
        """
        return []
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """
        Return list of format aliases for this renderer.
        Override this method in subclasses to specify format aliases.
        """
        return []
    @classmethod
    def get_priority(cls) -> int:
        """
        Return priority for this renderer (higher number = higher priority).
        Used when multiple renderers support the same format.
        """
        return 0
    @abstractmethod
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """
        Get the format-specific extraction prompt for AI content extraction.
        Args:
            user_prompt: User's original prompt for report generation
            title: Report title
        Returns:
            str: Format-specific prompt for AI extraction
        """
        pass
    @abstractmethod
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """
        Render extracted content to the target format.
        Args:
            extracted_content: Raw content extracted by AI using format-specific prompt
            title: Report title
        Returns:
            tuple: (rendered_content, mime_type)
        """
        pass
    def _extract_sections(self, report_data: Dict[str, Any]) -> list:
        """Extract sections from report data."""
        return report_data.get('sections', [])
    def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract metadata from report data."""
        return report_data.get('metadata', {})
    def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
        """Get title from report data or use fallback."""
        return report_data.get('title', fallback_title)
    def _format_timestamp(self, timestamp: str = None) -> str:
        """Format timestamp for display."""
        if timestamp:
            return timestamp
        from datetime import datetime, UTC
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/csv_renderer.py
+++ b/modules/services/serviceGeneration/renderers/csv_renderer.py
@ -1,64 +0,0 @@
 """
 CSV renderer for report generation.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import csv
 import io
 class CsvRenderer(BaseRenderer):
    """Renders content to CSV format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported CSV formats."""
        return ['csv']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['spreadsheet', 'table']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for CSV renderer."""
        return 70
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only CSV-specific guidelines; global prompt is built centrally."""
        return (
            "CSV FORMAT GUIDELINES:\n"
            "- Emit ONLY CSV text without fences or commentary.\n"
            "- Include a single header row with clear column names.\n"
            "- Quote fields containing commas, quotes, or newlines; escape quotes by doubling them.\n"
            "- Use rows to represent items/records derived from sources.\n"
            "- Keep cells concise; include units in headers when useful.\n"
            "OUTPUT: Return ONLY valid CSV content that can be imported."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to CSV format."""
        try:
            # The extracted content should already be CSV from the AI
            # Just clean it up
            csv_content = self._clean_csv_content(extracted_content, title)
            return csv_content, "text/csv"
        except Exception as e:
            self.logger.error(f"Error rendering CSV: {str(e)}")
            # Return minimal CSV fallback
            return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
    def _clean_csv_content(self, content: str, title: str) -> str:
        """Clean and validate CSV content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        return content
--- a/modules/services/serviceGeneration/renderers/docx_renderer.py
+++ b/modules/services/serviceGeneration/renderers/docx_renderer.py
@ -1,249 +0,0 @@
 """
 DOCX renderer for report generation using python-docx.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 from datetime import datetime, UTC
 try:
    from docx import Document
    from docx.shared import Inches, Pt
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.enum.table import WD_TABLE_ALIGNMENT
    from docx.oxml.shared import OxmlElement, qn
    from docx.oxml.ns import nsdecls
    from docx.oxml import parse_xml
    DOCX_AVAILABLE = True
 except ImportError:
    DOCX_AVAILABLE = False
 class DocxRenderer(BaseRenderer):
    """Renders content to DOCX format using python-docx."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported DOCX formats."""
        return ['docx', 'doc']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['word', 'document']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for DOCX renderer."""
        return 115
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only DOCX-specific guidelines; global prompt is built centrally."""
        return (
            "DOCX FORMAT GUIDELINES:\n"
            "- Provide plain text content suitable for Word generation (no markdown/HTML).\n"
            "- Use clear section hierarchy; bullet and numbered lists where needed.\n"
            "- Include tables as simple pipe-delimited lines if tabular data is needed.\n"
            "OUTPUT: Return ONLY the structured plain text to be converted into DOCX."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to DOCX format."""
        try:
            if not DOCX_AVAILABLE:
                # Fallback to HTML if python-docx not available
                from .html_renderer import HtmlRenderer
                html_renderer = HtmlRenderer()
                html_content, _ = await html_renderer.render(extracted_content, title)
                return html_content, "text/html"
            # Generate DOCX using python-docx
            docx_content = self._generate_docx(extracted_content, title)
            return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        except Exception as e:
            self.logger.error(f"Error rendering DOCX: {str(e)}")
            # Return minimal fallback
            return f"DOCX Generation Error: {str(e)}", "text/plain"
    def _generate_docx(self, content: str, title: str) -> str:
        """Generate DOCX content using python-docx."""
        try:
            # Create new document
            doc = Document()
            # Set up document styles
            self._setup_document_styles(doc)
            # Add title
            title_para = doc.add_heading(title, 0)
            title_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
            # Add generation date
            date_para = doc.add_paragraph(f"Generated: {self._format_timestamp()}")
            date_para.alignment = WD_ALIGN_PARAGRAPH.CENTER
            # Add page break
            doc.add_page_break()
            # Process content
            lines = content.split('\n')
            current_section = []
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                # Check for ALL CAPS headings (major headings)
                if line.isupper() and len(line) > 3 and not line.startswith('-') and not line.startswith('*'):
                    if current_section:
                        self._process_section(doc, current_section)
                        current_section = []
                    doc.add_heading(line, level=1)
                # Check for Title Case headings (subheadings)
                elif line.istitle() and len(line) > 5 and not line.startswith('-') and not line.startswith('*') and not line.startswith(('1.', '2.', '3.', '4.', '5.')):
                    if current_section:
                        self._process_section(doc, current_section)
                        current_section = []
                    doc.add_heading(line, level=2)
                # Check for markdown headings (fallback)
                elif line.startswith('# '):
                    # H1 heading
                    if current_section:
                        self._process_section(doc, current_section)
                        current_section = []
                    doc.add_heading(line[2:], level=1)
                elif line.startswith('## '):
                    # H2 heading
                    if current_section:
                        self._process_section(doc, current_section)
                        current_section = []
                    doc.add_heading(line[3:], level=2)
                elif line.startswith('### '):
                    # H3 heading
                    if current_section:
                        self._process_section(doc, current_section)
                        current_section = []
                    doc.add_heading(line[4:], level=3)
                else:
                    current_section.append(line)
            # Process remaining content
            if current_section:
                self._process_section(doc, current_section)
            # Save to buffer
            buffer = io.BytesIO()
            doc.save(buffer)
            buffer.seek(0)
            # Convert to base64
            docx_bytes = buffer.getvalue()
            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
            return docx_base64
        except Exception as e:
            self.logger.error(f"Error generating DOCX: {str(e)}")
            raise
    def _setup_document_styles(self, doc):
        """Set up document styles."""
        try:
            # Set default font
            style = doc.styles['Normal']
            font = style.font
            font.name = 'Calibri'
            font.size = Pt(11)
            # Set heading styles
            for i in range(1, 4):
                heading_style = doc.styles[f'Heading {i}']
                heading_font = heading_style.font
                heading_font.name = 'Calibri'
                heading_font.size = Pt(16 - i * 2)
                heading_font.bold = True
        except Exception as e:
            self.logger.warning(f"Could not set up document styles: {str(e)}")
    def _process_section(self, doc, lines: list):
        """Process a section of content into DOCX elements."""
        for line in lines:
            if not line.strip():
                continue
            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extract_table_data(lines)
                if table_data:
                    self._add_table(doc, table_data)
                    return
            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                doc.add_paragraph(line[2:], style='List Bullet')
            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
                # This is a numbered list item
                doc.add_paragraph(line[3:], style='List Number')
            else:
                # Regular paragraph
                doc.add_paragraph(line)
    def _extract_table_data(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False
        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break
        return table_data if len(table_data) > 1 else []
    def _add_table(self, doc, table_data: list):
        """Add a table to the document."""
        try:
            if not table_data:
                return
            # Create table
            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER
            # Add data to table
            for row_idx, row_data in enumerate(table_data):
                for col_idx, cell_data in enumerate(row_data):
                    if col_idx < len(table.rows[row_idx].cells):
                        table.rows[row_idx].cells[col_idx].text = cell_data
            # Style the table
            self._style_table(table)
        except Exception as e:
            self.logger.warning(f"Could not add table: {str(e)}")
    def _style_table(self, table):
        """Apply styling to the table."""
        try:
            # Style header row
            if len(table.rows) > 0:
                header_cells = table.rows[0].cells
                for cell in header_cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            run.bold = True
        except Exception as e:
            self.logger.warning(f"Could not style table: {str(e)}")
--- a/modules/services/serviceGeneration/renderers/excel_renderer.py
+++ b/modules/services/serviceGeneration/renderers/excel_renderer.py
@ -1,210 +0,0 @@
 """
 Excel renderer for report generation using openpyxl.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 from datetime import datetime, UTC
 try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
    from openpyxl.utils import get_column_letter
    from openpyxl.worksheet.table import Table, TableStyleInfo
    OPENPYXL_AVAILABLE = True
 except ImportError:
    OPENPYXL_AVAILABLE = False
 class ExcelRenderer(BaseRenderer):
    """Renders content to Excel format using openpyxl."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported Excel formats."""
        return ['xlsx', 'xls', 'excel']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['spreadsheet', 'workbook']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for Excel renderer."""
        return 110
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only Excel-specific guidelines; global prompt is built centrally."""
        return (
            "EXCEL FORMAT GUIDELINES:\n"
            "- Output one or more pipe-delimited tables with a single header row.\n"
            "- Let user intent define columns; use clear names and ISO dates.\n"
            "- Separate multiple tables by a single blank line.\n"
            "- No markdown/HTML/code fences; tables only unless user explicitly asks for notes.\n"
            "OUTPUT: Return ONLY pipe-delimited tables suitable for import."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to Excel format."""
        try:
            if not OPENPYXL_AVAILABLE:
                # Fallback to CSV if openpyxl not available
                from .csv_renderer import CsvRenderer
                csv_renderer = CsvRenderer()
                csv_content, _ = await csv_renderer.render(extracted_content, title)
                return csv_content, "text/csv"
            # Generate Excel using openpyxl
            excel_content = self._generate_excel(extracted_content, title)
            return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        except Exception as e:
            self.logger.error(f"Error rendering Excel: {str(e)}")
            # Return CSV fallback
            return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
    def _generate_excel(self, content: str, title: str) -> str:
        """Generate Excel content using openpyxl."""
        try:
            # Create workbook
            wb = Workbook()
            # Remove default sheet
            wb.remove(wb.active)
            # Create sheets
            summary_sheet = wb.create_sheet("Summary", 0)
            data_sheet = wb.create_sheet("Data", 1)
            analysis_sheet = wb.create_sheet("Analysis", 2)
            # Add content to sheets
            self._populate_summary_sheet(summary_sheet, title)
            self._populate_data_sheet(data_sheet, content)
            self._populate_analysis_sheet(analysis_sheet, content)
            # Save to buffer
            buffer = io.BytesIO()
            wb.save(buffer)
            buffer.seek(0)
            # Convert to base64
            excel_bytes = buffer.getvalue()
            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
            return excel_base64
        except Exception as e:
            self.logger.error(f"Error generating Excel: {str(e)}")
            raise
    def _populate_summary_sheet(self, sheet, title: str):
        """Populate the summary sheet."""
        try:
            # Title
            sheet['A1'] = title
            sheet['A1'].font = Font(size=16, bold=True)
            sheet['A1'].alignment = Alignment(horizontal='center')
            # Generation info
            sheet['A3'] = "Generated:"
            sheet['B3'] = self._format_timestamp()
            sheet['A4'] = "Status:"
            sheet['B4'] = "Generated Successfully"
            # Key metrics placeholder
            sheet['A6'] = "Key Metrics:"
            sheet['A6'].font = Font(bold=True)
            sheet['A7'] = "Total Items:"
            sheet['B7'] = "=COUNTA(Data!A:A)-1"  # Count non-empty cells in Data sheet
            # Auto-adjust column widths
            sheet.column_dimensions['A'].width = 20
            sheet.column_dimensions['B'].width = 30
        except Exception as e:
            self.logger.warning(f"Could not populate summary sheet: {str(e)}")
    def _populate_data_sheet(self, sheet, content: str):
        """Populate the data sheet."""
        try:
            # Headers
            headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
            for col, header in enumerate(headers, 1):
                cell = sheet.cell(row=1, column=col, value=header)
                cell.font = Font(bold=True)
                cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
            # Process content
            lines = content.split('\n')
            row = 2
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                # Check for table data (lines with |)
                if '|' in line:
                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                    for col, cell_data in enumerate(cells[:5], 1):  # Limit to 5 columns
                        sheet.cell(row=row, column=col, value=cell_data)
                    row += 1
                else:
                    # Regular content
                    sheet.cell(row=row, column=1, value=line)
                    row += 1
            # Auto-adjust column widths
            for col in range(1, 6):
                sheet.column_dimensions[get_column_letter(col)].width = 20
        except Exception as e:
            self.logger.warning(f"Could not populate data sheet: {str(e)}")
    def _populate_analysis_sheet(self, sheet, content: str):
        """Populate the analysis sheet."""
        try:
            # Title
            sheet['A1'] = "Analysis & Insights"
            sheet['A1'].font = Font(size=14, bold=True)
            # Content analysis
            lines = content.split('\n')
            row = 3
            sheet['A3'] = "Content Analysis:"
            sheet['A3'].font = Font(bold=True)
            row += 1
            # Count different types of content
            table_lines = sum(1 for line in lines if '|' in line)
            list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
            text_lines = len(lines) - table_lines - list_lines
            sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
            row += 1
            sheet[f'A{row}'] = f"Table Rows: {table_lines}"
            row += 1
            sheet[f'A{row}'] = f"List Items: {list_lines}"
            row += 1
            sheet[f'A{row}'] = f"Text Lines: {text_lines}"
            row += 2
            # Recommendations
            sheet[f'A{row}'] = "Recommendations:"
            sheet[f'A{row}'].font = Font(bold=True)
            row += 1
            sheet[f'A{row}'] = "1. Review data accuracy"
            row += 1
            sheet[f'A{row}'] = "2. Consider additional analysis"
            row += 1
            sheet[f'A{row}'] = "3. Update regularly"
            # Auto-adjust column width
            sheet.column_dimensions['A'].width = 30
        except Exception as e:
            self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
--- a/modules/services/serviceGeneration/renderers/html_renderer.py
+++ b/modules/services/serviceGeneration/renderers/html_renderer.py
@ -1,69 +0,0 @@
 """
 HTML renderer for report generation.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class HtmlRenderer(BaseRenderer):
    """Renders content to HTML format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported HTML formats."""
        return ['html', 'htm']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['web', 'webpage']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for HTML renderer."""
        return 100
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only HTML-specific guidelines; global prompt is built centrally."""
        return (
            "HTML FORMAT GUIDELINES:\n"
            "- Output a complete HTML5 document starting with <!DOCTYPE html>.\n"
            "- Include <html>, <head> with <meta charset=\"UTF-8\"> and <title>, and <body>.\n"
            "- Use semantic elements: <header>, <main>, <section>, <article>, <footer>.\n"
            "- Provide professional CSS in a <style> block; responsive, clean typography.\n"
            "- Use h1/h2/h3 for headings; tables and lists for structure.\n"
            "OUTPUT: Return ONLY valid HTML (no markdown, no code fences)."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to HTML format."""
        try:
            # The extracted content should already be HTML from the AI
            # Just clean it up and ensure it's valid
            html_content = self._clean_html_content(extracted_content, title)
            return html_content, "text/html"
        except Exception as e:
            self.logger.error(f"Error rendering HTML: {str(e)}")
            # Return minimal HTML fallback
            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
    def _clean_html_content(self, content: str, title: str) -> str:
        """Clean and validate HTML content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        # Ensure it starts with DOCTYPE
        if not content.startswith('<!DOCTYPE'):
            if content.startswith('<html'):
                content = '<!DOCTYPE html>\n' + content
            else:
                content = f'<!DOCTYPE html>\n<html>\n<head><meta charset="UTF-8"><title>{title}</title></head>\n<body>\n{content}\n</body>\n</html>'
        return content
--- a/modules/services/serviceGeneration/renderers/json_renderer.py
+++ b/modules/services/serviceGeneration/renderers/json_renderer.py
@ -1,74 +0,0 @@
 """
 JSON renderer for report generation.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import json
 class JsonRenderer(BaseRenderer):
    """Renders content to JSON format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported JSON formats."""
        return ['json']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['data']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for JSON renderer."""
        return 80
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only JSON-specific guidelines; global prompt is built centrally."""
        return (
            "JSON FORMAT GUIDELINES:\n"
            "- Output ONLY a single valid JSON object (no fences, no pre/post text).\n"
            "- Choose a structure that best fits the user's intent; include a top-level title and data.\n"
            "- Prefer arrays/objects that map cleanly to the extracted facts.\n"
            "- Include minimal metadata only if useful (e.g., generatedAt, sources).\n"
            "OUTPUT: Return ONLY valid, parseable JSON."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to JSON format."""
        try:
            # The extracted content should already be JSON from the AI
            # Just validate and format it
            json_content = self._clean_json_content(extracted_content, title)
            return json_content, "application/json"
        except Exception as e:
            self.logger.error(f"Error rendering JSON: {str(e)}")
            # Return minimal JSON fallback
            fallback_data = {
                "title": title,
                "sections": [{"type": "text", "content": f"Error rendering report: {str(e)}"}],
                "metadata": {"error": str(e)}
            }
            return json.dumps(fallback_data, indent=2), "application/json"
    def _clean_json_content(self, content: str, title: str) -> str:
        """Clean and validate JSON content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        # Validate JSON
        try:
            parsed = json.loads(content)
            # Re-format with proper indentation
            return json.dumps(parsed, indent=2, ensure_ascii=False)
        except json.JSONDecodeError:
            # If not valid JSON, return as-is
            return content
--- a/modules/services/serviceGeneration/renderers/markdown_renderer.py
+++ b/modules/services/serviceGeneration/renderers/markdown_renderer.py
@ -1,65 +0,0 @@
 """
 Markdown renderer for report generation.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class MarkdownRenderer(BaseRenderer):
    """Renders content to Markdown format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported Markdown formats."""
        return ['md', 'markdown']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['mdown', 'mkd']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for markdown renderer."""
        return 95
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only Markdown-specific guidelines; global prompt is built centrally."""
        return (
            "MARKDOWN FORMAT GUIDELINES:\n"
            "- Use proper Markdown syntax only (no HTML wrappers).\n"
            "- # for main title, ## for sections, ### for subsections.\n"
            "- Tables with | separators and a header row.\n"
            "- Bullet lists with - or *.\n"
            "- Emphasis with **bold** and *italic*.\n"
            "- Code blocks with ```language.\n"
            "- Horizontal rules (---) to separate major sections when helpful.\n"
            "- Include links [text](url) and images ![alt](url) when referenced by sources.\n"
            "OUTPUT: Return ONLY raw Markdown content without code fences."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to Markdown format."""
        try:
            # The extracted content should already be Markdown from the AI
            # Just clean it up
            markdown_content = self._clean_markdown_content(extracted_content, title)
            return markdown_content, "text/markdown"
        except Exception as e:
            self.logger.error(f"Error rendering markdown: {str(e)}")
            # Return minimal markdown fallback
            return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
    def _clean_markdown_content(self, content: str, title: str) -> str:
        """Clean and validate Markdown content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        return content
--- a/modules/services/serviceGeneration/renderers/pdf_renderer.py
+++ b/modules/services/serviceGeneration/renderers/pdf_renderer.py
@ -1,225 +0,0 @@
 """
 PDF renderer for report generation using reportlab.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 from datetime import datetime, UTC
 try:
    from reportlab.lib.pagesizes import letter, A4
    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.lib.units import inch
    from reportlab.lib import colors
    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
    REPORTLAB_AVAILABLE = True
 except ImportError:
    REPORTLAB_AVAILABLE = False
 class PdfRenderer(BaseRenderer):
    """Renders content to PDF format using reportlab."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported PDF formats."""
        return ['pdf']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['document', 'print']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for PDF renderer."""
        return 120
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only PDF-specific guidelines; global prompt is built centrally."""
        return (
            "PDF FORMAT GUIDELINES:\n"
            "- Provide structured content suitable for pagination and headings (H1/H2/H3-like).\n"
            "- Use bullet lists and tables where useful; separate major sections clearly.\n"
            "- Avoid markdown/HTML; produce clean, plain content that can be laid out as PDF.\n"
            "OUTPUT: Return ONLY the PDF-ready textual content (no fences)."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to PDF format."""
        try:
            if not REPORTLAB_AVAILABLE:
                # Fallback to HTML if reportlab not available
                from .html_renderer import HtmlRenderer
                html_renderer = HtmlRenderer()
                html_content, _ = await html_renderer.render(extracted_content, title)
                return html_content, "text/html"
            # Generate PDF using reportlab
            pdf_content = self._generate_pdf(extracted_content, title)
            return pdf_content, "application/pdf"
        except Exception as e:
            self.logger.error(f"Error rendering PDF: {str(e)}")
            # Return minimal fallback
            return f"PDF Generation Error: {str(e)}", "text/plain"
    def _generate_pdf(self, content: str, title: str) -> str:
        """Generate PDF content using reportlab."""
        try:
            # Create a buffer to hold the PDF
            buffer = io.BytesIO()
            # Create PDF document
            doc = SimpleDocTemplate(
                buffer,
                pagesize=A4,
                rightMargin=72,
                leftMargin=72,
                topMargin=72,
                bottomMargin=18
            )
            # Get styles
            styles = getSampleStyleSheet()
            # Create custom styles
            title_style = ParagraphStyle(
                'CustomTitle',
                parent=styles['Heading1'],
                fontSize=24,
                spaceAfter=30,
                alignment=TA_CENTER,
                textColor=colors.darkblue
            )
            heading_style = ParagraphStyle(
                'CustomHeading',
                parent=styles['Heading2'],
                fontSize=16,
                spaceAfter=12,
                spaceBefore=12,
                textColor=colors.darkblue
            )
            # Build PDF content
            story = []
            # Title page
            story.append(Paragraph(title, title_style))
            story.append(Spacer(1, 20))
            story.append(Paragraph(f"Generated: {self._format_timestamp()}", styles['Normal']))
            story.append(PageBreak())
            # Process content
            lines = content.split('\n')
            current_section = []
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                # Check for headings
                if line.startswith('# '):
                    # H1 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[2:], title_style))
                    story.append(Spacer(1, 12))
                elif line.startswith('## '):
                    # H2 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[3:], heading_style))
                    story.append(Spacer(1, 8))
                elif line.startswith('### '):
                    # H3 heading
                    if current_section:
                        story.extend(self._process_section(current_section, styles))
                        current_section = []
                    story.append(Paragraph(line[4:], styles['Heading3']))
                    story.append(Spacer(1, 6))
                else:
                    current_section.append(line)
            # Process remaining content
            if current_section:
                story.extend(self._process_section(current_section, styles))
            # Build PDF
            doc.build(story)
            # Get PDF content as base64
            buffer.seek(0)
            pdf_bytes = buffer.getvalue()
            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
            return pdf_base64
        except Exception as e:
            self.logger.error(f"Error generating PDF: {str(e)}")
            raise
    def _process_section(self, lines: list, styles) -> list:
        """Process a section of content into PDF elements."""
        elements = []
        for line in lines:
            if not line.strip():
                continue
            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extract_table_data(lines)
                if table_data:
                    table = Table(table_data)
                    table.setStyle(TableStyle([
                        ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
                        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
                        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
                        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
                        ('FONTSIZE', (0, 0), (-1, 0), 14),
                        ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                        ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
                        ('GRID', (0, 0), (-1, -1), 1, colors.black)
                    ]))
                    elements.append(table)
                    elements.append(Spacer(1, 12))
                    return elements
            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                elements.append(Paragraph(f"• {line[2:]}", styles['Normal']))
            else:
                # Regular paragraph
                elements.append(Paragraph(line, styles['Normal']))
        elements.append(Spacer(1, 6))
        return elements
    def _extract_table_data(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False
        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break
        return table_data if len(table_data) > 1 else []
--- a/modules/services/serviceGeneration/renderers/registry.py
+++ b/modules/services/serviceGeneration/renderers/registry.py
@ -6,7 +6,7 @@ import logging
 import importlib
 import pkgutil
 from typing import Dict, Type, List, Optional
-from .base_renderer import BaseRenderer
+from .rendererBaseTemplate import BaseRenderer
 logger = logging.getLogger(__name__)
@ -37,7 +37,7 @@ class RendererRegistry:
            # Scan all Python files in the renderers directory
            for file_path in renderers_dir.glob("*.py"):
-                if file_path.name in ['registry.py', 'base_renderer.py', '__init__.py']:
+                if file_path.name in ['registry.py', 'rendererBaseTemplate.py', '__init__.py']:
                    continue
                # Extract module name from filename
@ -92,7 +92,7 @@ class RendererRegistry:
        except Exception as e:
            logger.error(f"Error registering renderer {renderer_class.__name__}: {str(e)}")
-    def get_renderer(self, output_format: str) -> Optional[BaseRenderer]:
+    def get_renderer(self, output_format: str, services=None) -> Optional[BaseRenderer]:
        """Get a renderer instance for the specified format."""
        if not self._discovered:
            self.discover_renderers()
@ -109,7 +109,7 @@ class RendererRegistry:
        if renderer_class:
            try:
-                return renderer_class()
+                return renderer_class(services=services)
            except Exception as e:
                logger.error(f"Error creating renderer instance for {format_name}: {str(e)}")
                return None
@ -144,9 +144,9 @@ class RendererRegistry:
 # Global registry instance
 _registry = RendererRegistry()
-def get_renderer(output_format: str) -> Optional[BaseRenderer]:
+def get_renderer(output_format: str, services=None) -> Optional[BaseRenderer]:
    """Get a renderer instance for the specified format."""
-    return _registry.get_renderer(output_format)
+    return _registry.get_renderer(output_format, services)
 def get_supported_formats() -> List[str]:
    """Get list of all supported formats."""
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@ -0,0 +1,459 @@
 """
 Base renderer class for all format renderers.
 """
 from abc import ABC, abstractmethod
 from typing import Dict, Any, Tuple, List
 import logging
 import json
 logger = logging.getLogger(__name__)
 class BaseRenderer(ABC):
    """Base class for all format renderers."""
    def __init__(self, services=None):
        self.logger = logger
        self.services = services  # Add services attribute
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """
        Return list of supported format names for this renderer.
        Override this method in subclasses to specify supported formats.
        """
        return []
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """
        Return list of format aliases for this renderer.
        Override this method in subclasses to specify format aliases.
        """
        return []
    @classmethod
    def get_priority(cls) -> int:
        """
        Return priority for this renderer (higher number = higher priority).
        Used when multiple renderers support the same format.
        """
        return 0
    @abstractmethod
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """
        Render extracted JSON content to the target format.
        Args:
            extracted_content: Structured JSON content with sections and metadata
            title: Report title
            user_prompt: Original user prompt for context
            ai_service: AI service instance for additional processing
        Returns:
            tuple: (rendered_content, mime_type)
        """
        pass
    def _extract_sections(self, report_data: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Extract sections from report data."""
        return report_data.get('sections', [])
    def _extract_metadata(self, report_data: Dict[str, Any]) -> Dict[str, Any]:
        """Extract metadata from report data."""
        return report_data.get('metadata', {})
    def _get_title(self, report_data: Dict[str, Any], fallback_title: str) -> str:
        """Get title from report data or use fallback."""
        metadata = report_data.get('metadata', {})
        return metadata.get('title', fallback_title)
    def _validate_json_structure(self, json_content: Dict[str, Any]) -> bool:
        """Validate that JSON content has the expected structure."""
        if not isinstance(json_content, dict):
            return False
        if "sections" not in json_content:
            return False
        sections = json_content.get("sections", [])
        if not isinstance(sections, list):
            return False
        # Validate each section has content_type and elements
        for section in sections:
            if not isinstance(section, dict):
                return False
            if "content_type" not in section or "elements" not in section:
                return False
        return True
    def _get_section_type(self, section: Dict[str, Any]) -> str:
        """Get the type of a section."""
        return section.get("content_type", "paragraph")
    def _get_section_data(self, section: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Get the elements of a section."""
        return section.get("elements", [])
    def _get_section_id(self, section: Dict[str, Any]) -> str:
        """Get the ID of a section (if available)."""
        return section.get("id", "unknown")
    def _extract_table_data(self, section_data: Dict[str, Any]) -> Tuple[List[str], List[List[str]]]:
        """Extract table headers and rows from section data."""
        headers = section_data.get("headers", [])
        rows = section_data.get("rows", [])
        return headers, rows
    def _extract_bullet_list_items(self, section_data: Dict[str, Any]) -> List[str]:
        """Extract bullet list items from section data."""
        items = section_data.get("items", [])
        result = []
        for item in items:
            if isinstance(item, str):
                result.append(item)
            elif isinstance(item, dict) and "text" in item:
                result.append(item["text"])
        return result
    def _extract_heading_data(self, section_data: Dict[str, Any]) -> Tuple[int, str]:
        """Extract heading level and text from section data."""
        level = section_data.get("level", 1)
        text = section_data.get("text", "")
        return level, text
    def _extract_paragraph_text(self, section_data: Dict[str, Any]) -> str:
        """Extract paragraph text from section data."""
        return section_data.get("text", "")
    def _extract_code_block_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
        """Extract code and language from section data."""
        code = section_data.get("code", "")
        language = section_data.get("language", "")
        return code, language
    def _extract_image_data(self, section_data: Dict[str, Any]) -> Tuple[str, str]:
        """Extract base64 data and alt text from section data."""
        base64_data = section_data.get("base64Data", "")
        alt_text = section_data.get("altText", "Image")
        return base64_data, alt_text
    def _render_image_section(self, section: Dict[str, Any], styles: Dict[str, Any] = None) -> Any:
        """
        Render an image section. This is a base implementation that should be overridden
        by format-specific renderers.
        Args:
            section: Image section data
            styles: Optional styling information
        Returns:
            Format-specific image representation
        """
        section_data = self._get_section_data(section)
        base64_data, alt_text = self._extract_image_data(section_data)
        # Base implementation returns a simple dict
        # Format-specific renderers should override this method
        return {
            "content_type": "image",
            "base64Data": base64_data,
            "altText": alt_text,
            "width": section_data.get("width", None),
            "height": section_data.get("height", None),
            "caption": section_data.get("caption", "")
        }
    def _validate_image_data(self, base64_data: str, alt_text: str) -> bool:
        """Validate image data."""
        if not base64_data:
            self.logger.warning("Image section has no base64 data")
            return False
        if not alt_text:
            self.logger.warning("Image section has no alt text")
            return False
        # Basic base64 validation
        try:
            import base64
            base64.b64decode(base64_data, validate=True)
            return True
        except Exception as e:
            self.logger.warning(f"Invalid base64 image data: {str(e)}")
            return False
    def _get_image_dimensions(self, base64_data: str) -> Tuple[int, int]:
        """
        Get image dimensions from base64 data.
        This is a helper method that format-specific renderers can use.
        """
        try:
            import base64
            from PIL import Image
            import io
            # Decode base64 data
            image_data = base64.b64decode(base64_data)
            image = Image.open(io.BytesIO(image_data))
            return image.size  # Returns (width, height)
        except Exception as e:
            self.logger.warning(f"Could not determine image dimensions: {str(e)}")
            return (0, 0)
    def _resize_image_if_needed(self, base64_data: str, max_width: int = 800, max_height: int = 600) -> str:
        """
        Resize image if it exceeds maximum dimensions.
        Returns the resized image as base64 string.
        """
        try:
            import base64
            from PIL import Image
            import io
            # Decode base64 data
            image_data = base64.b64decode(base64_data)
            image = Image.open(io.BytesIO(image_data))
            # Check if resizing is needed
            width, height = image.size
            if width <= max_width and height <= max_height:
                return base64_data  # No resizing needed
            # Calculate new dimensions maintaining aspect ratio
            ratio = min(max_width / width, max_height / height)
            new_width = int(width * ratio)
            new_height = int(height * ratio)
            # Resize image
            resized_image = image.resize((new_width, new_height), Image.Resampling.LANCZOS)
            # Convert back to base64
            buffer = io.BytesIO()
            resized_image.save(buffer, format=image.format or 'PNG')
            resized_data = buffer.getvalue()
            return base64.b64encode(resized_data).decode('utf-8')
        except Exception as e:
            self.logger.warning(f"Could not resize image: {str(e)}")
            return base64_data  # Return original if resize fails
    def _get_supported_section_types(self) -> List[str]:
        """Return list of supported section types."""
        return ["table", "bullet_list", "heading", "paragraph", "code_block", "image"]
    def _is_valid_section_type(self, section_type: str) -> bool:
        """Check if a section type is valid."""
        return section_type in self._get_supported_section_types()
    def _process_section_by_type(self, section: Dict[str, Any]) -> Dict[str, Any]:
        """Process a section and return structured data based on its type."""
        section_type = self._get_section_type(section)
        section_data = self._get_section_data(section)
        if section_type == "table":
            headers, rows = self._extract_table_data(section_data)
            return {"content_type": "table", "headers": headers, "rows": rows}
        elif section_type == "bullet_list":
            items = self._extract_bullet_list_items(section_data)
            return {"content_type": "bullet_list", "items": items}
        elif section_type == "heading":
            level, text = self._extract_heading_data(section_data)
            return {"content_type": "heading", "level": level, "text": text}
        elif section_type == "paragraph":
            text = self._extract_paragraph_text(section_data)
            return {"content_type": "paragraph", "text": text}
        elif section_type == "code_block":
            code, language = self._extract_code_block_data(section_data)
            return {"content_type": "code_block", "code": code, "language": language}
        elif section_type == "image":
            base64_data, alt_text = self._extract_image_data(section_data)
            # Validate image data
            if self._validate_image_data(base64_data, alt_text):
                return {
                    "content_type": "image", 
                    "base64Data": base64_data, 
                    "altText": alt_text,
                    "width": section_data.get("width"),
                    "height": section_data.get("height"),
                    "caption": section_data.get("caption", "")
                }
            else:
                # Return placeholder if image data is invalid
                return {"content_type": "paragraph", "text": f"[Image: {alt_text}]"}
        else:
            # Fallback to paragraph
            text = self._extract_paragraph_text(section_data)
            return {"content_type": "paragraph", "text": text}
    def _format_timestamp(self, timestamp: str = None) -> str:
        """Format timestamp for display."""
        if timestamp:
            return timestamp
        from datetime import datetime, UTC
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
    # ===== GENERIC AI STYLING HELPERS =====
    async def _get_ai_styles(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
        """
        Generic AI styling method that can be used by all renderers.
        Args:
            ai_service: AI service instance
            style_template: Format-specific style template
            default_styles: Default styles to fall back to
        Returns:
            Dict with styling definitions
        """
        # DEBUG: Show which renderer is calling this method
        if not ai_service:
            return default_styles
        try:
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=style_template, context="", options=request_options)
            # DEBUG: Show the actual prompt being sent to AI
            self.logger.debug(f"AI Style Template Prompt:")
            self.logger.debug(f"{style_template}")
            response = await ai_service.aiObjects.call(request)
            import json
            import re
            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""
            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return default_styles
            # Extract JSON from markdown if present
            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1).strip()
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            # Try to parse JSON
            try:
                styles = json.loads(result)
            except json.JSONDecodeError as json_error:
                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
                # Use print instead of logger to avoid truncation
                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "RENDERER")
                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "RENDERER")
                self.logger.warning(f"Raw content that failed to parse: {result}")
                # Try to fix incomplete JSON by adding missing closing braces
                open_braces = result.count('{')
                close_braces = result.count('}')
                if open_braces > close_braces:
                    # JSON is incomplete, add missing closing braces
                    missing_braces = open_braces - close_braces
                    result = result + '}' * missing_braces
                    self.logger.info(f"Added {missing_braces} missing closing brace(s)")
                    self.logger.debug(f"Fixed JSON: {result}")
                    # Try parsing the fixed JSON
                    try:
                        styles = json.loads(result)
                        self.logger.info("Successfully fixed incomplete JSON")
                    except json.JSONDecodeError as fix_error:
                        self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
                        self.logger.warning(f"Fixed JSON content: {result}")
                        # Try to extract just the JSON part if it's embedded in text
                        json_start = result.find('{')
                        json_end = result.rfind('}')
                        if json_start != -1 and json_end != -1 and json_end > json_start:
                            json_part = result[json_start:json_end+1]
                            try:
                                styles = json.loads(json_part)
                                self.logger.info("Successfully extracted JSON from explanatory text")
                            except json.JSONDecodeError:
                                self.logger.warning("Could not extract valid JSON from response, using defaults")
                                return default_styles
                        else:
                            return default_styles
                else:
                    # Try to extract just the JSON part if it's embedded in text
                    json_start = result.find('{')
                    json_end = result.rfind('}')
                    if json_start != -1 and json_end != -1 and json_end > json_start:
                        json_part = result[json_start:json_end+1]
                        try:
                            styles = json.loads(json_part)
                            self.logger.info("Successfully extracted JSON from explanatory text")
                        except json.JSONDecodeError:
                            self.logger.warning("Could not extract valid JSON from response, using defaults")
                            return default_styles
                    else:
                        return default_styles
            # Convert colors to appropriate format
            styles = self._convert_colors_format(styles)
            return styles
        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return default_styles
    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """
        Convert colors to appropriate format based on renderer type.
        Override this method in subclasses for format-specific color handling.
        """
        return styles
    def _create_ai_style_template(self, format_name: str, user_prompt: str, style_schema: Dict[str, Any]) -> str:
        """
        Create a standardized AI style template for any format.
        Args:
            format_name: Name of the format (e.g., "docx", "xlsx", "pptx")
            user_prompt: User's original prompt
            style_schema: Format-specific style schema
        Returns:
            Formatted prompt string
        """
        schema_json = json.dumps(style_schema, indent=4)
        # DEBUG: Show the schema being sent
        return f"""You are a professional document styling expert. Generate a complete JSON styling configuration for {format_name.upper()} documents.
 Use this schema as a template and customize the values for professional document styling:
 {schema_json}
 Requirements:
 - Return ONLY the complete JSON object (no markdown, no explanations)
 - Customize colors, fonts, and spacing for professional appearance
 - Ensure all objects are properly closed with closing braces
 - Make the styling modern and professional
 Return the complete JSON:"""
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@ -0,0 +1,260 @@
 """
 CSV renderer for report generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import csv
 import io
 class RendererCsv(BaseRenderer):
    """Renders content to CSV format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported CSV formats."""
        return ['csv']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['spreadsheet', 'table']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for CSV renderer."""
        return 70
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to CSV format."""
        try:
            # Generate CSV directly from JSON (no styling needed for CSV)
            csv_content = await self._generate_csv_from_json(extracted_content, title)
            return csv_content, "text/csv"
        except Exception as e:
            self.logger.error(f"Error rendering CSV: {str(e)}")
            # Return minimal CSV fallback
            return f"Title,Content\n{title},Error rendering report: {str(e)}", "text/csv"
    async def _generate_csv_from_json(self, json_content: Dict[str, Any], title: str) -> str:
        """Generate CSV content from structured JSON document."""
        try:
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Generate CSV content
            csv_rows = []
            # Add title row
            if document_title:
                csv_rows.append([document_title])
                csv_rows.append([])  # Empty row
            # Process each section in order
            sections = json_content.get("sections", [])
            for section in sections:
                section_csv = self._render_json_section_to_csv(section)
                if section_csv:
                    csv_rows.extend(section_csv)
                    csv_rows.append([])  # Empty row between sections
            # Convert to CSV string
            csv_content = self._convert_rows_to_csv(csv_rows)
            return csv_content
        except Exception as e:
            self.logger.error(f"Error generating CSV from JSON: {str(e)}")
            raise Exception(f"CSV generation failed: {str(e)}")
    def _render_json_section_to_csv(self, section: Dict[str, Any]) -> List[List[str]]:
        """Render a single JSON section to CSV rows."""
        try:
            section_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            csv_rows = []
            # Add section title if available
            section_title = section.get("title")
            if section_title:
                csv_rows.append([f"# {section_title}"])
            # Process each element in the section
            for element in elements:
                if section_type == "table":
                    csv_rows.extend(self._render_json_table_to_csv(element))
                elif section_type == "list":
                    csv_rows.extend(self._render_json_list_to_csv(element))
                elif section_type == "heading":
                    csv_rows.extend(self._render_json_heading_to_csv(element))
                elif section_type == "paragraph":
                    csv_rows.extend(self._render_json_paragraph_to_csv(element))
                elif section_type == "code":
                    csv_rows.extend(self._render_json_code_to_csv(element))
                else:
                    # Fallback to paragraph for unknown types
                    csv_rows.extend(self._render_json_paragraph_to_csv(element))
            return csv_rows
        except Exception as e:
            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
            return [["[Error rendering section]"]]
    def _render_json_table_to_csv(self, table_data: Dict[str, Any]) -> List[List[str]]:
        """Render a JSON table to CSV rows."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            csv_rows = []
            if headers:
                csv_rows.append(headers)
            if rows:
                csv_rows.extend(rows)
            return csv_rows
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return [["[Error rendering table]"]]
    def _render_json_list_to_csv(self, list_data: Dict[str, Any]) -> List[List[str]]:
        """Render a JSON list to CSV rows."""
        try:
            items = list_data.get("items", [])
            csv_rows = []
            for item in items:
                if isinstance(item, dict):
                    text = item.get("text", "")
                    subitems = item.get("subitems", [])
                    csv_rows.append([text])
                    # Add subitems as indented rows
                    for subitem in subitems:
                        if isinstance(subitem, dict):
                            csv_rows.append([f"  - {subitem.get('text', '')}"])
                        else:
                            csv_rows.append([f"  - {subitem}"])
                else:
                    csv_rows.append([str(item)])
            return csv_rows
        except Exception as e:
            self.logger.warning(f"Error rendering list: {str(e)}")
            return [["[Error rendering list]"]]
    def _render_json_heading_to_csv(self, heading_data: Dict[str, Any]) -> List[List[str]]:
        """Render a JSON heading to CSV rows."""
        try:
            text = heading_data.get("text", "")
            level = heading_data.get("level", 1)
            if text:
                # Use # symbols for heading levels
                heading_text = f"{'#' * level} {text}"
                return [[heading_text]]
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return [["[Error rendering heading]"]]
    def _render_json_paragraph_to_csv(self, paragraph_data: Dict[str, Any]) -> List[List[str]]:
        """Render a JSON paragraph to CSV rows."""
        try:
            text = paragraph_data.get("text", "")
            if text:
                # Split long paragraphs into multiple rows if needed
                if len(text) > 100:
                    words = text.split()
                    rows = []
                    current_row = []
                    current_length = 0
                    for word in words:
                        if current_length + len(word) > 100 and current_row:
                            rows.append([" ".join(current_row)])
                            current_row = [word]
                            current_length = len(word)
                        else:
                            current_row.append(word)
                            current_length += len(word) + 1
                    if current_row:
                        rows.append([" ".join(current_row)])
                    return rows
                else:
                    return [[text]]
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return [["[Error rendering paragraph]"]]
    def _render_json_code_to_csv(self, code_data: Dict[str, Any]) -> List[List[str]]:
        """Render a JSON code block to CSV rows."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            csv_rows = []
            if language:
                csv_rows.append([f"Code ({language}):"])
            if code:
                # Split code into lines
                code_lines = code.split('\n')
                for line in code_lines:
                    csv_rows.append([f"  {line}"])
            return csv_rows
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return [["[Error rendering code block]"]]
    def _convert_rows_to_csv(self, rows: List[List[str]]) -> str:
        """Convert rows to CSV string."""
        import csv
        import io
        output = io.StringIO()
        writer = csv.writer(output)
        for row in rows:
            if row:  # Only write non-empty rows
                writer.writerow(row)
        return output.getvalue()
    def _clean_csv_content(self, content: str, title: str) -> str:
        """Clean and validate CSV content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        return content
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@ -0,0 +1,958 @@
 """
 DOCX renderer for report generation using python-docx.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 import re
 import os
 from datetime import datetime, UTC
 try:
    from docx import Document
    from docx.shared import Inches, Pt, RGBColor
    from docx.enum.text import WD_ALIGN_PARAGRAPH
    from docx.enum.table import WD_TABLE_ALIGNMENT
    from docx.oxml.shared import OxmlElement, qn
    from docx.oxml.ns import nsdecls
    from docx.oxml import parse_xml
    DOCX_AVAILABLE = True
 except ImportError:
    DOCX_AVAILABLE = False
 class RendererDocx(BaseRenderer):
    """Renders content to DOCX format using python-docx."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported DOCX formats."""
        return ['docx', 'doc']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['word', 'document']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for DOCX renderer."""
        return 115
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to DOCX format using AI-analyzed styling."""
        self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={user_prompt[:50] if user_prompt else 'None'}...", "DOCX_RENDERER")
        try:
            if not DOCX_AVAILABLE:
                # Fallback to HTML if python-docx not available
                from .rendererHtml import RendererHtml
                html_renderer = RendererHtml()
                html_content, _ = await html_renderer.render(extracted_content, title)
                return html_content, "text/html"
            # Generate DOCX using AI-analyzed styling
            docx_content = await self._generate_docx_from_json(extracted_content, title, user_prompt, ai_service)
            return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
        except Exception as e:
            self.logger.error(f"Error rendering DOCX: {str(e)}")
            # Return minimal fallback
            return f"DOCX Generation Error: {str(e)}", "text/plain"
    async def _generate_docx_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate DOCX content from structured JSON document using AI-generated styling."""
        try:
            # Create new document
            doc = Document()
            # Get AI-generated styling definitions
            self.logger.info(f"About to call AI styling with user_prompt: {user_prompt[:100] if user_prompt else 'None'}...")
            styles = await self._get_docx_styles(user_prompt, ai_service)
            # Apply basic document setup
            self._setup_basic_document_styles(doc)
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Add document title using analyzed styles
            if document_title:
                title_heading = doc.add_heading(document_title, level=1)
                title_heading.alignment = WD_ALIGN_PARAGRAPH.CENTER
            # Process each section in order
            sections = json_content.get("sections", [])
            for section in sections:
                self._render_json_section(doc, section, styles)
            # Save to buffer
            buffer = io.BytesIO()
            doc.save(buffer)
            buffer.seek(0)
            # Convert to base64
            docx_bytes = buffer.getvalue()
            docx_base64 = base64.b64encode(docx_bytes).decode('utf-8')
            return docx_base64
        except Exception as e:
            self.logger.error(f"Error generating DOCX from JSON: {str(e)}")
            raise Exception(f"DOCX generation failed: {str(e)}")
    async def _get_docx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get DOCX styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
        }
        style_template = self._create_ai_style_template("docx", user_prompt, style_schema)
        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_styles())
        # Validate and fix contrast issues
        return self._validate_styles_contrast(styles)
    def _validate_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
            return styles
        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_styles()
    def _get_default_styles(self) -> Dict[str, Any]:
        """Default DOCX styles."""
        return {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center"},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left"},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left"},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left"},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left"},
            "table_border": {"style": "horizontal_only", "color": "#000000", "thickness": "thin"},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "indent": 20},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#2F2F2F", "background": "#F5F5F5"}
        }
    def _setup_basic_document_styles(self, doc: Document) -> None:
        """Set up basic document styles."""
        try:
            # Set default font
            style = doc.styles['Normal']
            font = style.font
            font.name = 'Calibri'
            font.size = Pt(11)
        except Exception as e:
            self.logger.warning(f"Could not set up basic document styles: {str(e)}")
    def _clear_template_content(self, doc: Document) -> None:
        """Clear template content while preserving styles."""
        try:
            # Remove all paragraphs except keep the styles
            for paragraph in list(doc.paragraphs):
                # Keep the paragraph but clear its content
                paragraph.clear()
            # Remove all tables
            for table in list(doc.tables):
                table._element.getparent().remove(table._element)
        except Exception as e:
            self.logger.warning(f"Could not clear template content: {str(e)}")
    def _render_json_section(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a single JSON section to DOCX using AI-generated styles."""
        try:
            section_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            # Process each element in the section
            for element in elements:
                if section_type == "table":
                    self._render_json_table(doc, element, styles)
                elif section_type == "bullet_list":
                    self._render_json_bullet_list(doc, element, styles)
                elif section_type == "heading":
                    self._render_json_heading(doc, element, styles)
                elif section_type == "paragraph":
                    self._render_json_paragraph(doc, element, styles)
                elif section_type == "code_block":
                    self._render_json_code_block(doc, element, styles)
                elif section_type == "image":
                    self._render_json_image(doc, element, styles)
                else:
                    # Fallback to paragraph for unknown types
                    self._render_json_paragraph(doc, element, styles)
        except Exception as e:
            self.logger.warning(f"Error rendering section {section.get('id', 'unknown')}: {str(e)}")
            # Add error paragraph as fallback
            error_para = doc.add_paragraph(f"[Error rendering section: {str(e)}]")
    def _render_json_table(self, doc: Document, table_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON table to DOCX using AI-generated styles."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            if not headers or not rows:
                return
            # Create table
            table = doc.add_table(rows=len(rows) + 1, cols=len(headers))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER
            # Apply table borders based on AI style
            border_style = styles["table_border"]["style"]
            if border_style == "horizontal_only":
                self._apply_horizontal_borders_only(table)
            elif border_style == "grid":
                table.style = 'Table Grid'
            # else: no borders
            # Add headers with AI-generated styling
            header_row = table.rows[0]
            header_style = styles["table_header"]
            for i, header in enumerate(headers):
                if i < len(header_row.cells):
                    cell = header_row.cells[i]
                    cell.text = str(header)
                    # Apply background color
                    bg_color = header_style["background"].lstrip('#')
                    self._set_cell_background(cell, RGBColor(int(bg_color[0:2], 16), int(bg_color[2:4], 16), int(bg_color[4:6], 16)))
                    # Apply text styling
                    for paragraph in cell.paragraphs:
                        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER if header_style["align"] == "center" else WD_ALIGN_PARAGRAPH.LEFT
                        for run in paragraph.runs:
                            run.bold = header_style["bold"]
                            run.font.size = Pt(11)
                            text_color = header_style["text_color"].lstrip('#')
                            run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
            # Add data rows with AI-generated styling
            cell_style = styles["table_cell"]
            for row_idx, row_data in enumerate(rows):
                if row_idx + 1 < len(table.rows):
                    table_row = table.rows[row_idx + 1]
                    for col_idx, cell_data in enumerate(row_data):
                        if col_idx < len(table_row.cells):
                            cell = table_row.cells[col_idx]
                            cell.text = str(cell_data)
                            # Apply text styling
                            for paragraph in cell.paragraphs:
                                paragraph.alignment = WD_ALIGN_PARAGRAPH.LEFT
                                for run in paragraph.runs:
                                    run.font.size = Pt(10)
                                    text_color = cell_style["text_color"].lstrip('#')
                                    run.font.color.rgb = RGBColor(int(text_color[0:2], 16), int(text_color[2:4], 16), int(text_color[4:6], 16))
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
    def _apply_horizontal_borders_only(self, table) -> None:
        """Apply only horizontal borders to the table (no vertical borders)."""
        try:
            from docx.oxml.shared import OxmlElement, qn
            # Get table properties
            tbl_pr = table._element.find(qn('w:tblPr'))
            if tbl_pr is None:
                tbl_pr = OxmlElement('w:tblPr')
                table._element.insert(0, tbl_pr)
            # Remove existing borders
            existing_borders = tbl_pr.find(qn('w:tblBorders'))
            if existing_borders is not None:
                tbl_pr.remove(existing_borders)
            # Create new borders element
            tbl_borders = OxmlElement('w:tblBorders')
            # Top border
            top_border = OxmlElement('w:top')
            top_border.set(qn('w:val'), 'single')
            top_border.set(qn('w:sz'), '4')
            top_border.set(qn('w:space'), '0')
            top_border.set(qn('w:color'), '000000')
            tbl_borders.append(top_border)
            # Bottom border
            bottom_border = OxmlElement('w:bottom')
            bottom_border.set(qn('w:val'), 'single')
            bottom_border.set(qn('w:sz'), '4')
            bottom_border.set(qn('w:space'), '0')
            bottom_border.set(qn('w:color'), '000000')
            tbl_borders.append(bottom_border)
            # Left border - none
            left_border = OxmlElement('w:left')
            left_border.set(qn('w:val'), 'none')
            tbl_borders.append(left_border)
            # Right border - none
            right_border = OxmlElement('w:right')
            right_border.set(qn('w:val'), 'none')
            tbl_borders.append(right_border)
            # Inside horizontal border
            inside_h_border = OxmlElement('w:insideH')
            inside_h_border.set(qn('w:val'), 'single')
            inside_h_border.set(qn('w:sz'), '4')
            inside_h_border.set(qn('w:space'), '0')
            inside_h_border.set(qn('w:color'), '000000')
            tbl_borders.append(inside_h_border)
            # Inside vertical border - none
            inside_v_border = OxmlElement('w:insideV')
            inside_v_border.set(qn('w:val'), 'none')
            tbl_borders.append(inside_v_border)
            tbl_pr.append(tbl_borders)
        except Exception as e:
            self.logger.warning(f"Could not apply horizontal borders: {str(e)}")
    def _set_cell_background(self, cell, color: RGBColor) -> None:
        """Set the background color of a table cell."""
        try:
            from docx.oxml.shared import OxmlElement, qn
            # Get cell properties
            tc_pr = cell._element.find(qn('w:tcPr'))
            if tc_pr is None:
                tc_pr = OxmlElement('w:tcPr')
                cell._element.insert(0, tc_pr)
            # Remove existing shading
            existing_shading = tc_pr.find(qn('w:shd'))
            if existing_shading is not None:
                tc_pr.remove(existing_shading)
            # Create new shading element
            shading = OxmlElement('w:shd')
            shading.set(qn('w:val'), 'clear')
            shading.set(qn('w:color'), 'auto')
            # Convert RGBColor to hex string by unpacking RGB components
            red, green, blue = color
            hex_color = f"{red:02x}{green:02x}{blue:02x}"
            shading.set(qn('w:fill'), hex_color)
            tc_pr.append(shading)
        except Exception as e:
            self.logger.warning(f"Could not set cell background: {str(e)}")
    def _render_json_bullet_list(self, doc: Document, list_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON bullet list to DOCX using AI-generated styles."""
        try:
            items = list_data.get("items", [])
            bullet_style = styles["bullet_list"]
            for item in items:
                if isinstance(item, str):
                    para = doc.add_paragraph(item, style='List Bullet')
                elif isinstance(item, dict) and "text" in item:
                    para = doc.add_paragraph(item["text"], style='List Bullet')
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
    def _render_json_heading(self, doc: Document, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON heading to DOCX using AI-generated styles."""
        try:
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            if text:
                level = max(1, min(6, level))
                doc.add_heading(text, level=level)
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
    def _render_json_paragraph(self, doc: Document, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON paragraph to DOCX using AI-generated styles."""
        try:
            text = paragraph_data.get("text", "")
            if text:
                para = doc.add_paragraph(text)
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
    def _render_json_code_block(self, doc: Document, code_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON code block to DOCX using AI-generated styles."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            if code:
                if language:
                    lang_para = doc.add_paragraph(f"Code ({language}):")
                    lang_para.runs[0].bold = True
                code_para = doc.add_paragraph(code)
                for run in code_para.runs:
                    run.font.name = 'Courier New'
                    run.font.size = Pt(10)
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
    def _render_json_image(self, doc: Document, image_data: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Render a JSON image to DOCX."""
        try:
            base64_data = image_data.get("base64Data", "")
            alt_text = image_data.get("altText", "Image")
            if base64_data:
                image_bytes = base64.b64decode(base64_data)
                doc.add_picture(io.BytesIO(image_bytes), width=Inches(4))
                if alt_text:
                    caption_para = doc.add_paragraph(f"Figure: {alt_text}")
                    caption_para.runs[0].italic = True
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            doc.add_paragraph(f"[Image: {image_data.get('altText', 'Image')}]")
    def _extract_structure_from_prompt(self, user_prompt: str, title: str) -> Dict[str, Any]:
        """Extract document structure from user prompt."""
        structure = {
            'title': title,
            'sections': [],
            'format': 'standard'
        }
        if not user_prompt:
            return structure
        # Extract title from prompt if not provided
        if not title or title == "Generated Document":
            # Look for "create a ... document" or "generate a ... report"
            import re
            title_match = re.search(r'(?:create|generate|make)\s+a\s+([^,]+?)(?:\s+document|\s+report|\s+summary)', user_prompt.lower())
            if title_match:
                structure['title'] = title_match.group(1).strip().title()
        # Extract sections from numbered lists in prompt
        import re
        section_pattern = r'(\d+)\)?\s*([^,]+?)(?:\s*[,:]|\s*$)'
        sections = re.findall(section_pattern, user_prompt)
        for num, section_text in sections:
            structure['sections'].append({
                'number': int(num),
                'title': section_text.strip(),
                'level': 2  # H2 level
            })
        # If no numbered sections found, try to extract from "including:" patterns
        if not structure['sections']:
            including_match = re.search(r'including:\s*(.+?)(?:\.|$)', user_prompt, re.DOTALL)
            if including_match:
                including_text = including_match.group(1)
                # Split by common separators
                parts = re.split(r'[,;]\s*', including_text)
                for i, part in enumerate(parts, 1):
                    part = part.strip()
                    if part:
                        structure['sections'].append({
                            'number': i,
                            'title': part,
                            'level': 2
                        })
        # If still no sections, extract from any list-like patterns
        if not structure['sections']:
            # Look for bullet points or dashes
            bullet_pattern = r'[-•]\s*([^,\n]+?)(?:\s*[,:]|\s*$)'
            bullets = re.findall(bullet_pattern, user_prompt)
            for i, bullet in enumerate(bullets, 1):
                bullet = bullet.strip()
                if bullet and len(bullet) > 3:
                    structure['sections'].append({
                        'number': i,
                        'title': bullet,
                        'level': 2
                    })
        # If still no sections, extract from sentence structure
        if not structure['sections']:
            # Split prompt into sentences and use as sections
            sentences = re.split(r'[.!?]\s+', user_prompt)
            for i, sentence in enumerate(sentences[:5], 1):  # Max 5 sections
                sentence = sentence.strip()
                if sentence and len(sentence) > 10 and not sentence.startswith(('Analyze', 'Create', 'Generate')):
                    structure['sections'].append({
                        'number': i,
                        'title': sentence[:50] + "..." if len(sentence) > 50 else sentence,
                        'level': 2
                    })
        # Final fallback: create sections from prompt keywords
        if not structure['sections']:
            # Extract key action words from prompt
            action_words = ['analyze', 'summarize', 'review', 'assess', 'evaluate', 'examine', 'investigate']
            found_actions = []
            for action in action_words:
                if action in user_prompt.lower():
                    found_actions.append(action.title())
            if found_actions:
                for i, action in enumerate(found_actions[:3], 1):
                    structure['sections'].append({
                        'number': i,
                        'title': f"{action} Document Content",
                        'level': 2
                    })
            else:
                # Last resort: generic but meaningful sections
                structure['sections'] = [
                    {'number': 1, 'title': 'Document Analysis', 'level': 2},
                    {'number': 2, 'title': 'Key Information', 'level': 2},
                    {'number': 3, 'title': 'Summary and Conclusions', 'level': 2}
                ]
        return structure
    def _generate_content_from_structure(self, doc, content: str, structure: Dict[str, Any]):
        """Generate DOCX content based on extracted structure."""
        # Add sections based on prompt structure
        for section in structure['sections']:
            # Add section heading
            doc.add_heading(f"{section['number']}) {section['title']}", level=section['level'])
            # Add AI-generated content for this section
            # Try to extract relevant content for this section from the AI response
            section_content = self._extract_section_content(content, section['title'])
            if section_content:
                doc.add_paragraph(section_content)
            else:
                # If no specific content found, add a note
                doc.add_paragraph(f"Content for {section['title']} based on document analysis.")
            # Add some spacing
            doc.add_paragraph()
        # Add the complete AI-generated content as additional analysis
        if content and content.strip():
            doc.add_heading("Complete Analysis", level=1)
            doc.add_paragraph(content)
    def _extract_section_content(self, content: str, section_title: str) -> str:
        """Extract relevant content for a specific section from AI response."""
        if not content or not section_title:
            return ""
        # Look for content that matches the section title
        section_keywords = section_title.lower().split()
        # Split content into paragraphs
        paragraphs = content.split('\n\n')
        relevant_paragraphs = []
        for paragraph in paragraphs:
            paragraph_lower = paragraph.lower()
            # Check if paragraph contains keywords from section title
            if any(keyword in paragraph_lower for keyword in section_keywords if len(keyword) > 3):
                relevant_paragraphs.append(paragraph.strip())
        if relevant_paragraphs:
            return '\n\n'.join(relevant_paragraphs[:2])  # Max 2 paragraphs per section
        return ""
    def _setup_document_styles(self, doc):
        """Set up document styles."""
        try:
            # Set default font
            style = doc.styles['Normal']
            font = style.font
            font.name = 'Calibri'
            font.size = Pt(11)
            # Set heading styles
            for i in range(1, 4):
                heading_style = doc.styles[f'Heading {i}']
                heading_font = heading_style.font
                heading_font.name = 'Calibri'
                heading_font.size = Pt(16 - i * 2)
                heading_font.bold = True
        except Exception as e:
            self.logger.warning(f"Could not set up document styles: {str(e)}")
    def _process_section(self, doc, lines: list):
        """Process a section of content into DOCX elements."""
        for line in lines:
            if not line.strip():
                continue
            # Check for tables (lines with |)
            if '|' in line and not line.startswith('|'):
                # This might be part of a table, process as table
                table_data = self._extract_table_data(lines)
                if table_data:
                    self._add_table(doc, table_data)
                    return
            # Check for lists
            if line.startswith('- ') or line.startswith('* '):
                # This is a list item
                doc.add_paragraph(line[2:], style='List Bullet')
            elif line.startswith(('1. ', '2. ', '3. ', '4. ', '5. ')):
                # This is a numbered list item
                doc.add_paragraph(line[3:], style='List Number')
            else:
                # Regular paragraph
                doc.add_paragraph(line)
    def _extract_table_data(self, lines: list) -> list:
        """Extract table data from lines."""
        table_data = []
        in_table = False
        for line in lines:
            if '|' in line:
                if not in_table:
                    in_table = True
                # Split by | and clean up
                cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                if cells:
                    table_data.append(cells)
            elif in_table and not line.strip():
                # Empty line, might be end of table
                break
        return table_data if len(table_data) > 1 else []
    def _add_table(self, doc, table_data: list):
        """Add a table to the document."""
        try:
            if not table_data:
                return
            # Create table
            table = doc.add_table(rows=len(table_data), cols=len(table_data[0]))
            table.alignment = WD_TABLE_ALIGNMENT.CENTER
            # Add data to table
            for row_idx, row_data in enumerate(table_data):
                for col_idx, cell_data in enumerate(row_data):
                    if col_idx < len(table.rows[row_idx].cells):
                        table.rows[row_idx].cells[col_idx].text = cell_data
            # Style the table
            self._style_table(table)
        except Exception as e:
            self.logger.warning(f"Could not add table: {str(e)}")
    def _style_table(self, table):
        """Apply styling to the table."""
        try:
            # Style header row
            if len(table.rows) > 0:
                header_cells = table.rows[0].cells
                for cell in header_cells:
                    for paragraph in cell.paragraphs:
                        for run in paragraph.runs:
                            run.bold = True
        except Exception as e:
            self.logger.warning(f"Could not style table: {str(e)}")
    def _process_table_row(self, doc, line: str):
        """Process a table row and add it to the document."""
        if not line.strip():
            return
        # Split by pipe separator
        parts = [part.strip() for part in line.split('|')]
        if len(parts) >= 2:
            # This is a table row - create a table if it doesn't exist
            if not hasattr(self, '_current_table') or self._current_table is None:
                # Create new table
                self._current_table = doc.add_table(rows=1, cols=len(parts))
                self._current_table.style = 'Table Grid'
                # Add header row
                for i, part in enumerate(parts):
                    if i < len(self._current_table.rows[0].cells):
                        cell = self._current_table.rows[0].cells[i]
                        cell.text = part
                        # Make header bold
                        for paragraph in cell.paragraphs:
                            for run in paragraph.runs:
                                run.bold = True
            else:
                # Add data row to existing table
                row = self._current_table.add_row()
                for i, part in enumerate(parts):
                    if i < len(row.cells):
                        row.cells[i].text = part
        else:
            # Not a table row, treat as regular text
            doc.add_paragraph(line)
    def _clean_ai_content(self, content: str) -> str:
        """Clean AI-generated content by removing debug information and duplicates."""
        if not content:
            return ""
        # Remove debug information
        lines = content.split('\n')
        clean_lines = []
        for line in lines:
            # Skip debug lines and separators
            if (line.startswith('[Skipped ') or 
                line.startswith('=== DOCUMENT:') or 
                line.startswith('---') or
                line.startswith('FILENAME:') or
                line.strip() == '' or
                line.strip() == '---'):
                continue
            clean_lines.append(line)
        # Join lines and remove duplicate content
        clean_content = '\n'.join(clean_lines)
        # Remove duplicate sections by keeping only the first occurrence
        sections = clean_content.split('\n\n')
        seen_sections = set()
        unique_sections = []
        for section in sections:
            section_key = section.strip()[:50]  # Use first 50 chars as key
            if section_key not in seen_sections and section.strip():
                seen_sections.add(section_key)
                unique_sections.append(section)
        return '\n\n'.join(unique_sections)
    def _process_tables(self, doc, content: str) -> str:
        """
        Process tables in the content (both CSV and pipe-separated) and convert them to Word tables.
        Returns the content with tables replaced by placeholders.
        """
        import csv
        import io
        lines = content.split('\n')
        processed_lines = []
        i = 0
        while i < len(lines):
            line = lines[i].strip()
            # Check if this line looks like a table (contains pipes or commas with multiple fields)
            is_pipe_table = '|' in line and len(line.split('|')) >= 2
            is_csv_table = ',' in line and len(line.split(',')) >= 2
            if is_pipe_table or is_csv_table:
                # Collect consecutive table lines
                table_lines = []
                j = i
                # Determine separator and collect lines
                separator = '|' if is_pipe_table else ','
                while j < len(lines):
                    current_line = lines[j].strip()
                    if separator in current_line and len(current_line.split(separator)) >= 2:
                        table_lines.append(current_line)
                        j += 1
                    else:
                        break
                if len(table_lines) >= 2:  # At least header + 1 data row
                    # Create Word table
                    try:
                        if separator == '|':
                            # Process pipe-separated table
                            rows = []
                            for table_line in table_lines:
                                # Split by pipe and clean up
                                cells = [cell.strip() for cell in table_line.split('|')]
                                rows.append(cells)
                        else:
                            # Process CSV table
                            csv_content = '\n'.join(table_lines)
                            csv_reader = csv.reader(io.StringIO(csv_content))
                            rows = list(csv_reader)
                        if rows and len(rows[0]) > 0:
                            # Create Word table
                            table = doc.add_table(rows=len(rows), cols=len(rows[0]))
                            table.style = 'Table Grid'
                            # Populate table
                            for row_idx, row_data in enumerate(rows):
                                for col_idx, cell_data in enumerate(row_data):
                                    if col_idx < len(table.rows[row_idx].cells):
                                        table.rows[row_idx].cells[col_idx].text = cell_data.strip()
                                # Make header row bold
                                if row_idx == 0:
                                    for cell in table.rows[row_idx].cells:
                                        for paragraph in cell.paragraphs:
                                            for run in paragraph.runs:
                                                run.bold = True
                            # Add placeholder to mark where table was inserted
                            processed_lines.append(f"[TABLE_INSERTED_{len(processed_lines)}]")
                            # Skip the table lines
                            i = j
                            continue
                    except Exception as e:
                        # If table parsing fails, treat as regular text
                        pass
            processed_lines.append(line)
            i += 1
        return '\n'.join(processed_lines)
    def _parse_and_format_content(self, doc, content: str, title: str):
        """Parse AI-generated content in standardized format and apply proper DOCX formatting."""
        if not content:
            return
        # Process tables and replace them with placeholders
        content = self._process_tables(doc, content)
        # Parse content line by line in exact sequence
        lines = content.split('\n')
        for line in lines:
            line = line.strip()
            if not line:
                # Empty line - add paragraph break
                doc.add_paragraph()
                continue
            # Skip table placeholders (already processed)
            if line.startswith('[TABLE_INSERTED_'):
                continue
            # Check if this is a Markdown heading (# ## ###)
            if line.startswith('#'):
                level = len(line) - len(line.lstrip('#'))
                heading_text = line.lstrip('# ').strip()
                doc.add_heading(heading_text, level=min(level, 3))
            # Check if this is a numbered heading (1) Title, 2) Title, etc.)
            elif re.match(r'^\d+\)\s+.+', line):
                heading_text = re.sub(r'^\d+\)\s+', '', line)
                doc.add_heading(heading_text, level=1)
            # Check if this is a Markdown list item
            elif line.startswith('- ') or re.match(r'^\d+\.\s+', line):
                bullet_text = re.sub(r'^[-•]\s+|\d+\.\s+', '', line)
                self._add_bullet_point(doc, bullet_text)
            # Check if this is a code block
            elif line.startswith('```'):
                if not line.endswith('```'):
                    # Start of code block - collect until end
                    code_lines = [line]
                    continue
                else:
                    # End of code block
                    if 'code_lines' in locals():
                        code_lines.append(line)
                        code_text = '\n'.join(code_lines)
                        para = doc.add_paragraph()
                        run = para.add_run(code_text)
                        run.font.name = 'Courier New'
                        del code_lines
            # Regular paragraph
            else:
                self._add_paragraph_to_doc(doc, line)
    def _add_paragraph_to_doc(self, doc, text: str):
        """Add a paragraph to the document with proper formatting."""
        if not text.strip():
            return
        # Check for Markdown formatting (**bold**, *italic*)
        para = doc.add_paragraph()
        # Split by bold markers
        parts = text.split('**')
        for i, part in enumerate(parts):
            if i % 2 == 0:
                # Regular text - check for italic
                italic_parts = part.split('*')
                for j, italic_part in enumerate(italic_parts):
                    if j % 2 == 0:
                        # Regular text
                        if italic_part:
                            para.add_run(italic_part)
                    else:
                        # Italic text
                        if italic_part:
                            run = para.add_run(italic_part)
                            run.italic = True
            else:
                # Bold text
                if part:
                    run = para.add_run(part)
                    run.bold = True
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@ -0,0 +1,424 @@
 """
 HTML renderer for report generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class RendererHtml(BaseRenderer):
    """Renders content to HTML format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported HTML formats."""
        return ['html', 'htm']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['web', 'webpage']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for HTML renderer."""
        return 100
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to HTML format using AI-analyzed styling."""
        try:
            # Generate HTML using AI-analyzed styling
            html_content = await self._generate_html_from_json(extracted_content, title, user_prompt, ai_service)
            return html_content, "text/html"
        except Exception as e:
            self.logger.error(f"Error rendering HTML: {str(e)}")
            # Return minimal HTML fallback
            return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
    async def _generate_html_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate HTML content from structured JSON document using AI-generated styling."""
        try:
            # Get AI-generated styling definitions
            styles = await self._get_html_styles(user_prompt, ai_service)
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Build HTML document
            html_parts = []
            # HTML document structure
            html_parts.append('<!DOCTYPE html>')
            html_parts.append('<html lang="en">')
            html_parts.append('<head>')
            html_parts.append('<meta charset="UTF-8">')
            html_parts.append('<meta name="viewport" content="width=device-width, initial-scale=1.0">')
            html_parts.append(f'<title>{document_title}</title>')
            html_parts.append('<style>')
            html_parts.append(self._generate_css_styles(styles))
            html_parts.append('</style>')
            html_parts.append('</head>')
            html_parts.append('<body>')
            # Document header
            html_parts.append(f'<header><h1 class="document-title">{document_title}</h1></header>')
            # Main content
            html_parts.append('<main>')
            # Process each section
            sections = json_content.get("sections", [])
            for section in sections:
                section_html = self._render_json_section(section, styles)
                if section_html:
                    html_parts.append(section_html)
            html_parts.append('</main>')
            # Footer
            html_parts.append('<footer>')
            html_parts.append(f'<p class="generated-info">Generated: {self._format_timestamp()}</p>')
            html_parts.append('</footer>')
            html_parts.append('</body>')
            html_parts.append('</html>')
            return '\n'.join(html_parts)
        except Exception as e:
            self.logger.error(f"Error generating HTML from JSON: {str(e)}")
            raise Exception(f"HTML generation failed: {str(e)}")
    async def _get_html_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get HTML styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
            "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
            "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
            "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
            "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
            "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
            "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
            "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
            "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
            "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
            "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
        }
        style_template = self._create_ai_style_template("html", user_prompt, style_schema)
        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_html_styles())
        # Validate and fix contrast issues
        return self._validate_html_styles_contrast(styles)
    def _validate_html_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["color"] = "#FFFFFF"
            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["color"] = "#2F2F2F"
            return styles
        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_html_styles()
    def _get_default_html_styles(self) -> Dict[str, Any]:
        """Default HTML styles."""
        return {
            "title": {"font_size": "2.5em", "color": "#1F4E79", "font_weight": "bold", "text_align": "center", "margin": "0 0 1em 0"},
            "heading1": {"font_size": "2em", "color": "#2F2F2F", "font_weight": "bold", "text_align": "left", "margin": "1.5em 0 0.5em 0"},
            "heading2": {"font_size": "1.5em", "color": "#4F4F4F", "font_weight": "bold", "text_align": "left", "margin": "1em 0 0.5em 0"},
            "paragraph": {"font_size": "1em", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "margin": "0 0 1em 0", "line_height": "1.6"},
            "table": {"border": "1px solid #ddd", "border_collapse": "collapse", "width": "100%", "margin": "1em 0"},
            "table_header": {"background": "#4F4F4F", "color": "#FFFFFF", "font_weight": "bold", "text_align": "center", "padding": "12px"},
            "table_cell": {"background": "#FFFFFF", "color": "#2F2F2F", "font_weight": "normal", "text_align": "left", "padding": "8px", "border": "1px solid #ddd"},
            "bullet_list": {"font_size": "1em", "color": "#2F2F2F", "margin": "0 0 1em 0", "padding_left": "20px"},
            "code_block": {"font_family": "Courier New, monospace", "font_size": "0.9em", "color": "#2F2F2F", "background": "#F5F5F5", "padding": "1em", "border": "1px solid #ddd", "border_radius": "4px", "margin": "1em 0"},
            "image": {"max_width": "100%", "height": "auto", "margin": "1em 0", "border_radius": "4px"},
            "body": {"font_family": "Arial, sans-serif", "background": "#FFFFFF", "color": "#2F2F2F", "margin": "0", "padding": "20px"}
        }
    def _generate_css_styles(self, styles: Dict[str, Any]) -> str:
        """Generate CSS from style definitions."""
        css_parts = []
        # Body styles
        body_style = styles.get("body", {})
        css_parts.append("body {")
        for property_name, value in body_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Document title
        title_style = styles.get("title", {})
        css_parts.append(".document-title {")
        for property_name, value in title_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Headings
        for heading_level in ["heading1", "heading2"]:
            heading_style = styles.get(heading_level, {})
            css_class = f"h{heading_level[-1]}"
            css_parts.append(f"{css_class} {{")
            for property_name, value in heading_style.items():
                css_property = property_name.replace("_", "-")
                css_parts.append(f"  {css_property}: {value};")
            css_parts.append("}")
        # Paragraphs
        paragraph_style = styles.get("paragraph", {})
        css_parts.append("p {")
        for property_name, value in paragraph_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Tables
        table_style = styles.get("table", {})
        css_parts.append("table {")
        for property_name, value in table_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Table headers
        table_header_style = styles.get("table_header", {})
        css_parts.append("th {")
        for property_name, value in table_header_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Table cells
        table_cell_style = styles.get("table_cell", {})
        css_parts.append("td {")
        for property_name, value in table_cell_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Lists
        bullet_list_style = styles.get("bullet_list", {})
        css_parts.append("ul {")
        for property_name, value in bullet_list_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Code blocks
        code_block_style = styles.get("code_block", {})
        css_parts.append("pre {")
        for property_name, value in code_block_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Images
        image_style = styles.get("image", {})
        css_parts.append("img {")
        for property_name, value in image_style.items():
            css_property = property_name.replace("_", "-")
            css_parts.append(f"  {css_property}: {value};")
        css_parts.append("}")
        # Generated info
        css_parts.append(".generated-info {")
        css_parts.append("  font-size: 0.9em;")
        css_parts.append("  color: #666;")
        css_parts.append("  text-align: center;")
        css_parts.append("  margin-top: 2em;")
        css_parts.append("  padding-top: 1em;")
        css_parts.append("  border-top: 1px solid #ddd;")
        css_parts.append("}")
        return '\n'.join(css_parts)
    def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a single JSON section to HTML using AI-generated styles."""
        try:
            section_type = self._get_section_type(section)
            section_data = self._get_section_data(section)
            if section_type == "table":
                # Process the section data to extract table structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_table(processed_data, styles)
            elif section_type == "bullet_list":
                # Process the section data to extract bullet list structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_bullet_list(processed_data, styles)
            elif section_type == "heading":
                return self._render_json_heading(section_data, styles)
            elif section_type == "paragraph":
                return self._render_json_paragraph(section_data, styles)
            elif section_type == "code_block":
                # Process the section data to extract code block structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_code_block(processed_data, styles)
            elif section_type == "image":
                # Process the section data to extract image structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_image(processed_data, styles)
            else:
                # Fallback to paragraph for unknown types
                return self._render_json_paragraph(section_data, styles)
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
            return f'<div class="error">[Error rendering section: {str(e)}]</div>'
    def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON table to HTML using AI-generated styles."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            if not headers or not rows:
                return ""
            html_parts = ['<table>']
            # Table header
            html_parts.append('<thead><tr>')
            for header in headers:
                html_parts.append(f'<th>{header}</th>')
            html_parts.append('</tr></thead>')
            # Table body
            html_parts.append('<tbody>')
            for row in rows:
                html_parts.append('<tr>')
                for cell_data in row:
                    html_parts.append(f'<td>{cell_data}</td>')
                html_parts.append('</tr>')
            html_parts.append('</tbody>')
            html_parts.append('</table>')
            return '\n'.join(html_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""
    def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON bullet list to HTML using AI-generated styles."""
        try:
            items = list_data.get("items", [])
            if not items:
                return ""
            html_parts = ['<ul>']
            for item in items:
                if isinstance(item, str):
                    html_parts.append(f'<li>{item}</li>')
                elif isinstance(item, dict) and "text" in item:
                    html_parts.append(f'<li>{item["text"]}</li>')
            html_parts.append('</ul>')
            return '\n'.join(html_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""
    def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON heading to HTML using AI-generated styles."""
        try:
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            if text:
                level = max(1, min(6, level))
                return f'<h{level}>{text}</h{level}>'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""
    def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON paragraph to HTML using AI-generated styles."""
        try:
            text = paragraph_data.get("text", "")
            if text:
                return f'<p>{text}</p>'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""
    def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON code block to HTML using AI-generated styles."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            if code:
                if language:
                    return f'<pre><code class="language-{language}">{code}</code></pre>'
                else:
                    return f'<pre><code>{code}</code></pre>'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""
    def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> str:
        """Render a JSON image to HTML."""
        try:
            base64_data = image_data.get("base64Data", "")
            alt_text = image_data.get("altText", "Image")
            if base64_data:
                return f'<img src="data:image/png;base64,{base64_data}" alt="{alt_text}">'
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f'<div class="error">[Image: {image_data.get("altText", "Image")}]</div>'
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@ -0,0 +1,281 @@
 """
 Image renderer for report generation using AI image generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import base64
 import logging
 logger = logging.getLogger(__name__)
 class RendererImage(BaseRenderer):
    """Renders content to image format using AI image generation."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported image formats."""
        return ['png', 'jpg', 'jpeg', 'image']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['img', 'picture', 'photo', 'graphic']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for image renderer."""
        return 90
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to image format using AI image generation."""
        try:
            # Generate AI image from content
            image_content = await self._generate_ai_image(extracted_content, title, user_prompt, ai_service)
            return image_content, "image/png"
        except Exception as e:
            self.logger.error(f"Error rendering image: {str(e)}")
            # Re-raise the exception instead of using fallback
            raise Exception(f"Image rendering failed: {str(e)}")
    async def _generate_ai_image(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate AI image from extracted content."""
        try:
            if not ai_service:
                raise ValueError("AI service is required for image generation")
            # Validate JSON structure
            if not isinstance(extracted_content, dict):
                raise ValueError("Extracted content must be a dictionary")
            if "sections" not in extracted_content:
                raise ValueError("Extracted content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = extracted_content.get("metadata", {}).get("title", title)
            # Create AI prompt for image generation
            image_prompt = await self._create_image_generation_prompt(extracted_content, document_title, user_prompt, ai_service)
            # Generate image using AI
            image_result = await ai_service.aiObjects.generateImage(
                prompt=image_prompt,
                size="1024x1024",
                quality="standard",
                style="vivid"
            )
            # Extract base64 image data from result
            if image_result and image_result.get("success", False):
                image_data = image_result.get("image_data", "")
                if image_data:
                    return image_data
                else:
                    raise ValueError("No image data returned from AI")
            else:
                error_msg = image_result.get("error", "Unknown error") if image_result else "No result"
                raise ValueError(f"AI image generation failed: {error_msg}")
        except Exception as e:
            self.logger.error(f"Error generating AI image: {str(e)}")
            raise Exception(f"AI image generation failed: {str(e)}")
    async def _create_image_generation_prompt(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Create a detailed prompt for AI image generation based on the content."""
        try:
            # Start with base prompt
            prompt_parts = []
            # Add user's original intent if available
            if user_prompt:
                prompt_parts.append(f"User Request: {user_prompt}")
            # Add document title
            prompt_parts.append(f"Document Title: {title}")
            # Analyze content and create visual description
            sections = extracted_content.get("sections", [])
            content_description = self._analyze_content_for_visual_description(sections)
            if content_description:
                prompt_parts.append(f"Content to Visualize: {content_description}")
            # Add style guidance
            style_guidance = self._get_style_guidance_from_content(extracted_content, user_prompt)
            if style_guidance:
                prompt_parts.append(f"Visual Style: {style_guidance}")
            # Combine all parts
            full_prompt = "Create a professional, informative image that visualizes the following content:\n\n" + "\n\n".join(prompt_parts)
            # Add technical requirements
            full_prompt += "\n\nTechnical Requirements:"
            full_prompt += "\n- High quality, professional appearance"
            full_prompt += "\n- Clear, readable text if any text is included"
            full_prompt += "\n- Appropriate colors and layout"
            full_prompt += "\n- Suitable for business/professional use"
            # Truncate prompt if it exceeds DALL-E's 4000 character limit
            if len(full_prompt) > 4000:
                # Use AI to compress the prompt intelligently
                compressed_prompt = await self._compress_prompt_with_ai(full_prompt, ai_service)
                if compressed_prompt and len(compressed_prompt) <= 4000:
                    return compressed_prompt
                # Fallback to minimal prompt if AI compression fails or is still too long
                minimal_prompt = f"Create a professional image representing: {title}"
                if user_prompt:
                    minimal_prompt += f" - {user_prompt}"
                # If even the minimal prompt is too long, truncate it
                if len(minimal_prompt) > 4000:
                    minimal_prompt = minimal_prompt[:3997] + "..."
                return minimal_prompt
            return full_prompt
        except Exception as e:
            self.logger.warning(f"Error creating image prompt: {str(e)}")
            # Fallback to simple prompt
            return f"Create a professional image representing: {title}"
    async def _compress_prompt_with_ai(self, long_prompt: str, ai_service=None) -> str:
        """Use AI to intelligently compress a long prompt while preserving key information."""
        try:
            if not ai_service:
                return None
            compression_prompt = f"""
 You are an expert at creating concise, effective prompts for AI image generation. 
 The following prompt is too long for DALL-E (4000 character limit) and needs to be compressed to under 4000 characters while preserving the most important visual information.
 Original prompt ({len(long_prompt)} characters):
 {long_prompt}
 Please create a compressed version that:
 1. Keeps the most important visual elements and requirements
 2. Maintains the core intent and style guidance
 3. Preserves technical requirements
 4. Stays under 4000 characters
 5. Is optimized for DALL-E image generation
 Return only the compressed prompt, no explanations.
 """
            # Use AI to compress the prompt - call the AI service correctly
            # The ai_service has an aiObjects attribute that contains the actual AI interface
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request = AiCallRequest(
                prompt=compression_prompt,
                options=AiCallOptions(
                    operationType=OperationType.GENERAL,
                    maxTokens=2000,
                    temperature=0.3  # Lower temperature for more consistent compression
                )
            )
            response = await ai_service.aiObjects.call(request)
            compressed = response.content.strip()
            # Validate the compressed prompt
            if compressed and len(compressed) <= 4000 and len(compressed) > 50:
                self.logger.info(f"Successfully compressed prompt from {len(long_prompt)} to {len(compressed)} characters")
                return compressed
            else:
                self.logger.warning(f"AI compression failed or produced invalid result: {len(compressed) if compressed else 0} chars")
                return None
        except Exception as e:
            self.logger.warning(f"Error compressing prompt with AI: {str(e)}")
            return None
    def _analyze_content_for_visual_description(self, sections: List[Dict[str, Any]]) -> str:
        """Analyze content sections and create a visual description for AI."""
        try:
            descriptions = []
            for section in sections:
                section_type = self._get_section_type(section)
                section_data = self._get_section_data(section)
                if section_type == "table":
                    headers = section_data.get("headers", [])
                    rows = section_data.get("rows", [])
                    if headers and rows:
                        descriptions.append(f"Data table with {len(headers)} columns and {len(rows)} rows: {', '.join(headers)}")
                elif section_type == "bullet_list":
                    items = section_data.get("items", [])
                    if items:
                        descriptions.append(f"List with {len(items)} items")
                elif section_type == "heading":
                    text = section_data.get("text", "")
                    level = section_data.get("level", 1)
                    if text:
                        descriptions.append(f"Heading {level}: {text}")
                elif section_type == "paragraph":
                    text = section_data.get("text", "")
                    if text and len(text) > 10:  # Only include substantial paragraphs
                        # Truncate long text
                        truncated = text[:100] + "..." if len(text) > 100 else text
                        descriptions.append(f"Text content: {truncated}")
                elif section_type == "code_block":
                    code = section_data.get("code", "")
                    language = section_data.get("language", "")
                    if code:
                        descriptions.append(f"Code block ({language}): {code[:50]}...")
            return "; ".join(descriptions) if descriptions else "General document content"
        except Exception as e:
            self.logger.warning(f"Error analyzing content: {str(e)}")
            return "Document content"
    def _get_style_guidance_from_content(self, extracted_content: Dict[str, Any], user_prompt: str = None) -> str:
        """Determine visual style guidance based on content and user prompt."""
        try:
            style_elements = []
            # Analyze user prompt for style hints
            if user_prompt:
                prompt_lower = user_prompt.lower()
                if any(word in prompt_lower for word in ["modern", "contemporary", "sleek"]):
                    style_elements.append("modern, clean design")
                elif any(word in prompt_lower for word in ["classic", "traditional", "formal"]):
                    style_elements.append("classic, formal design")
                elif any(word in prompt_lower for word in ["creative", "artistic", "colorful"]):
                    style_elements.append("creative, artistic design")
                elif any(word in prompt_lower for word in ["corporate", "business", "professional"]):
                    style_elements.append("corporate, professional design")
            # Analyze content type for additional style hints
            sections = extracted_content.get("sections", [])
            has_tables = any(self._get_section_type(s) == "table" for s in sections)
            has_lists = any(self._get_section_type(s) == "bullet_list" for s in sections)
            has_code = any(self._get_section_type(s) == "code_block" for s in sections)
            if has_tables:
                style_elements.append("data-focused layout")
            if has_lists:
                style_elements.append("organized, structured presentation")
            if has_code:
                style_elements.append("technical, developer-friendly")
            # Default style if no specific guidance
            if not style_elements:
                style_elements.append("professional, clean design")
            return ", ".join(style_elements)
        except Exception as e:
            self.logger.warning(f"Error determining style guidance: {str(e)}")
            return "professional design"
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@ -0,0 +1,79 @@
 """
 JSON renderer for report generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import json
 class RendererJson(BaseRenderer):
    """Renders content to JSON format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported JSON formats."""
        return ['json']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['data']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for JSON renderer."""
        return 80
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to JSON format."""
        try:
            # The extracted content should already be JSON from the AI
            # Just validate and format it
            json_content = self._clean_json_content(extracted_content, title)
            return json_content, "application/json"
        except Exception as e:
            self.logger.error(f"Error rendering JSON: {str(e)}")
            # Return minimal JSON fallback
            fallback_data = {
                "title": title,
                "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
                "metadata": {"error": str(e)}
            }
            return json.dumps(fallback_data, indent=2), "application/json"
    def _clean_json_content(self, content: Dict[str, Any], title: str) -> str:
        """Clean and validate JSON content from AI."""
        try:
            # Validate JSON structure
            if not isinstance(content, dict):
                raise ValueError("Content must be a dictionary")
            # Ensure it has the expected structure
            if "sections" not in content:
                # Convert old format to new format
                content = {
                    "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
                    "metadata": {"title": title}
                }
            # Ensure metadata exists
            if "metadata" not in content:
                content["metadata"] = {}
            # Set title in metadata if not present
            if "title" not in content["metadata"]:
                content["metadata"]["title"] = title
            # Re-format with proper indentation
            return json.dumps(content, indent=2, ensure_ascii=False)
        except Exception as e:
            self.logger.warning(f"Error cleaning JSON content: {str(e)}")
            # Return minimal valid JSON
            fallback_data = {
                "sections": [{"content_type": "paragraph", "elements": [{"text": str(content)}]}],
                "metadata": {"title": title, "error": str(e)}
            }
            return json.dumps(fallback_data, indent=2, ensure_ascii=False)
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@ -0,0 +1,221 @@
 """
 Markdown renderer for report generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class RendererMarkdown(BaseRenderer):
    """Renders content to Markdown format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported Markdown formats."""
        return ['md', 'markdown']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['mdown', 'mkd']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for markdown renderer."""
        return 95
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to Markdown format."""
        try:
            # Generate markdown from JSON structure
            markdown_content = self._generate_markdown_from_json(extracted_content, title)
            return markdown_content, "text/markdown"
        except Exception as e:
            self.logger.error(f"Error rendering markdown: {str(e)}")
            # Return minimal markdown fallback
            return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
    def _generate_markdown_from_json(self, json_content: Dict[str, Any], title: str) -> str:
        """Generate markdown content from structured JSON document."""
        try:
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Build markdown content
            markdown_parts = []
            # Document title
            markdown_parts.append(f"# {document_title}")
            markdown_parts.append("")
            # Process each section
            sections = json_content.get("sections", [])
            for section in sections:
                section_markdown = self._render_json_section(section)
                if section_markdown:
                    markdown_parts.append(section_markdown)
                    markdown_parts.append("")  # Add spacing between sections
            # Add generation info
            markdown_parts.append("---")
            markdown_parts.append(f"*Generated: {self._format_timestamp()}*")
            return '\n'.join(markdown_parts)
        except Exception as e:
            self.logger.error(f"Error generating markdown from JSON: {str(e)}")
            raise Exception(f"Markdown generation failed: {str(e)}")
    def _render_json_section(self, section: Dict[str, Any]) -> str:
        """Render a single JSON section to markdown."""
        try:
            section_type = self._get_section_type(section)
            section_data = self._get_section_data(section)
            if section_type == "table":
                # Process the section data to extract table structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_table(processed_data)
            elif section_type == "bullet_list":
                # Process the section data to extract bullet list structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_bullet_list(processed_data)
            elif section_type == "heading":
                return self._render_json_heading(section_data)
            elif section_type == "paragraph":
                return self._render_json_paragraph(section_data)
            elif section_type == "code_block":
                # Process the section data to extract code block structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_code_block(processed_data)
            elif section_type == "image":
                # Process the section data to extract image structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_image(processed_data)
            else:
                # Fallback to paragraph for unknown types
                return self._render_json_paragraph(section_data)
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
            return f"*[Error rendering section: {str(e)}]*"
    def _render_json_table(self, table_data: Dict[str, Any]) -> str:
        """Render a JSON table to markdown."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            if not headers or not rows:
                return ""
            markdown_parts = []
            # Create table header
            header_line = " | ".join(str(header) for header in headers)
            markdown_parts.append(header_line)
            # Add separator line
            separator_line = " | ".join("---" for _ in headers)
            markdown_parts.append(separator_line)
            # Add data rows
            for row in rows:
                row_line = " | ".join(str(cell_data) for cell_data in row)
                markdown_parts.append(row_line)
            return '\n'.join(markdown_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""
    def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
        """Render a JSON bullet list to markdown."""
        try:
            items = list_data.get("items", [])
            if not items:
                return ""
            markdown_parts = []
            for item in items:
                if isinstance(item, str):
                    markdown_parts.append(f"- {item}")
                elif isinstance(item, dict) and "text" in item:
                    markdown_parts.append(f"- {item['text']}")
            return '\n'.join(markdown_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""
    def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
        """Render a JSON heading to markdown."""
        try:
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            if text:
                level = max(1, min(6, level))
                return f"{'#' * level} {text}"
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""
    def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
        """Render a JSON paragraph to markdown."""
        try:
            text = paragraph_data.get("text", "")
            return text if text else ""
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""
    def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
        """Render a JSON code block to markdown."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            if code:
                if language:
                    return f"```{language}\n{code}\n```"
                else:
                    return f"```\n{code}\n```"
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""
    def _render_json_image(self, image_data: Dict[str, Any]) -> str:
        """Render a JSON image to markdown."""
        try:
            alt_text = image_data.get("altText", "Image")
            base64_data = image_data.get("base64Data", "")
            if base64_data:
                # For base64 images, we can't embed them directly in markdown
                # So we'll use a placeholder with the alt text
                return f"![{alt_text}](data:image/png;base64,{base64_data[:50]}...)"
            else:
                return f"![{alt_text}](image-placeholder)"
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f"![{image_data.get('altText', 'Image')}](image-error)"
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@ -0,0 +1,642 @@
 """
 PDF renderer for report generation using reportlab.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 from datetime import datetime, UTC
 try:
    from reportlab.lib.pagesizes import letter, A4
    from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle, PageBreak
    from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
    from reportlab.lib.units import inch
    from reportlab.lib import colors
    from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_JUSTIFY
    REPORTLAB_AVAILABLE = True
 except ImportError:
    REPORTLAB_AVAILABLE = False
 class RendererPdf(BaseRenderer):
    """Renders content to PDF format using reportlab."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported PDF formats."""
        return ['pdf']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['document', 'print']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for PDF renderer."""
        return 120
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to PDF format using AI-analyzed styling."""
        try:
            if not REPORTLAB_AVAILABLE:
                # Fallback to HTML if reportlab not available
                from .rendererHtml import RendererHtml
                html_renderer = RendererHtml()
                html_content, _ = await html_renderer.render(extracted_content, title, user_prompt, ai_service)
                return html_content, "text/html"
            # Generate PDF using AI-analyzed styling
            pdf_content = await self._generate_pdf_from_json(extracted_content, title, user_prompt, ai_service)
            return pdf_content, "application/pdf"
        except Exception as e:
            self.logger.error(f"Error rendering PDF: {str(e)}")
            # Return minimal fallback
            return f"PDF Generation Error: {str(e)}", "text/plain"
    async def _generate_pdf_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate PDF content from structured JSON document using AI-generated styling."""
        try:
            # Get AI-generated styling definitions
            styles = await self._get_pdf_styles(user_prompt, ai_service)
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Make title shorter to prevent wrapping/overlapping
            if len(document_title) > 40:
                document_title = "PowerOn - Consent Agreement"
            # Create a buffer to hold the PDF
            buffer = io.BytesIO()
            # Create PDF document
            doc = SimpleDocTemplate(
                buffer,
                pagesize=A4,
                rightMargin=72,
                leftMargin=72,
                topMargin=72,
                bottomMargin=18
            )
            # Build PDF content
            story = []
            # Title page
            title_style = self._create_title_style(styles)
            story.append(Paragraph(document_title, title_style))
            story.append(Spacer(1, 50))  # Increased spacing to prevent overlap
            story.append(Paragraph(f"Generated: {self._format_timestamp()}", self._create_normal_style(styles)))
            story.append(Spacer(1, 30))  # Add spacing before page break
            story.append(PageBreak())
            # Process each section
            sections = json_content.get("sections", [])
            self.services.utils.debugLogToFile(f"PDF SECTIONS TO PROCESS: {len(sections)} sections", "PDF_RENDERER")
            for i, section in enumerate(sections):
                self.services.utils.debugLogToFile(f"PDF SECTION {i}: content_type={section.get('content_type', 'unknown')}, id={section.get('id', 'unknown')}", "PDF_RENDERER")
                section_elements = self._render_json_section(section, styles)
                self.services.utils.debugLogToFile(f"PDF SECTION {i} ELEMENTS: {len(section_elements)} elements", "PDF_RENDERER")
                story.extend(section_elements)
            # Build PDF
            doc.build(story)
            # Get PDF content as base64
            buffer.seek(0)
            pdf_bytes = buffer.getvalue()
            pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
            return pdf_base64
        except Exception as e:
            self.logger.error(f"Error generating PDF from JSON: {str(e)}")
            raise Exception(f"PDF generation failed: {str(e)}")
    async def _get_pdf_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get PDF styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
            "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
        }
        style_template = self._create_ai_style_template("pdf", user_prompt, style_schema)
        # Use base template method like DOCX does (this works!)
        styles = await self._get_ai_styles(ai_service, style_template, self._get_default_pdf_styles())
        if styles is None:
            return self._get_default_pdf_styles()
        # Convert colors to PDF format after getting styles
        styles = self._convert_colors_format(styles)
        # Validate and fix contrast issues
        return self._validate_pdf_styles_contrast(styles)
    async def _get_ai_styles_with_pdf_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
        """Get AI styles with proper PDF color conversion."""
        if not ai_service:
            return default_styles
        try:
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=style_template, context="", options=request_options)
            # Check if AI service is properly configured
            if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
                self.logger.warning("AI service not properly configured, using defaults")
                return default_styles
            response = await ai_service.aiObjects.call(request)
            # Check if response is valid
            if not response:
                self.logger.warning("AI service returned no response, using defaults")
                return default_styles
            import json
            import re
            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""
            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return default_styles
            # Log the raw response for debugging
            self.logger.debug(f"AI styling raw response: {result[:200]}...")
            # Extract JSON from various formats
            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1).strip()
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            # Try to extract JSON from explanatory text
            json_patterns = [
                r'\{[^{}]*"title"[^{}]*\}',  # Simple JSON object
                r'\{.*?"title".*?\}',        # JSON with title field
                r'\{.*?"font_size".*?\}',    # JSON with font_size field
            ]
            for pattern in json_patterns:
                json_match = re.search(pattern, result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                    break
            # Additional cleanup - remove any leading/trailing whitespace and newlines
            result = result.strip()
            # Check if result is still empty after cleanup
            if not result:
                self.logger.warning("AI styling returned empty content after cleanup, using defaults")
                return default_styles
            # Try to parse JSON
            try:
                styles = json.loads(result)
                self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
            except json.JSONDecodeError as json_error:
                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
                # Use print instead of logger to avoid truncation
                self.services.utils.debugLogToFile(f"FULL AI RESPONSE THAT FAILED TO PARSE: {result}", "PDF_RENDERER")
                self.services.utils.debugLogToFile(f"RESPONSE LENGTH: {len(result)} characters", "PDF_RENDERER")
                self.logger.warning(f"Raw content that failed to parse: {result}")
                # Try to fix incomplete JSON by adding missing closing braces
                open_braces = result.count('{')
                close_braces = result.count('}')
                if open_braces > close_braces:
                    # JSON is incomplete, add missing closing braces
                    missing_braces = open_braces - close_braces
                    result = result + '}' * missing_braces
                    self.logger.info(f"Added {missing_braces} missing closing brace(s)")
                    # Try parsing the fixed JSON
                    try:
                        styles = json.loads(result)
                        self.logger.info("Successfully fixed incomplete JSON")
                    except json.JSONDecodeError as fix_error:
                        self.logger.warning(f"Fixed JSON still invalid: {fix_error}")
                        # Try to extract just the JSON part if it's embedded in text
                        json_start = result.find('{')
                        json_end = result.rfind('}')
                        if json_start != -1 and json_end != -1 and json_end > json_start:
                            json_part = result[json_start:json_end+1]
                            try:
                                styles = json.loads(json_part)
                                self.logger.info("Successfully extracted JSON from explanatory text")
                            except json.JSONDecodeError:
                                self.logger.warning("Could not extract valid JSON from response, using defaults")
                                return default_styles
                        else:
                            return default_styles
                else:
                    # Try to extract just the JSON part if it's embedded in text
                    json_start = result.find('{')
                    json_end = result.rfind('}')
                    if json_start != -1 and json_end != -1 and json_end > json_start:
                        json_part = result[json_start:json_end+1]
                        try:
                            styles = json.loads(json_part)
                            self.logger.info("Successfully extracted JSON from explanatory text")
                        except json.JSONDecodeError:
                            self.logger.warning("Could not extract valid JSON from response, using defaults")
                            return default_styles
                    else:
                        return default_styles
            # Convert colors to PDF format (keep as hex strings, PDF renderer will convert them)
            styles = self._convert_colors_format(styles)
            return styles
        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return default_styles
    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Convert colors to proper format for PDF compatibility."""
        try:
            for style_name, style_config in styles.items():
                if isinstance(style_config, dict):
                    for prop, value in style_config.items():
                        if isinstance(value, str) and value.startswith('#') and len(value) == 7:
                            # Convert #RRGGBB to #AARRGGBB (add FF alpha channel) for consistency
                            styles[style_name][prop] = f"FF{value[1:]}"
                        elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
                            # Already aRGB format, keep as is
                            pass
            return styles
        except Exception as e:
            self.logger.warning(f"Color conversion failed: {str(e)}")
            return styles
    def _get_safe_color(self, color_value: str, default: str = "#000000") -> str:
        """Get a safe hex color value for PDF."""
        if isinstance(color_value, str) and color_value.startswith('#'):
            if len(color_value) == 7:
                return f"FF{color_value[1:]}"
            elif len(color_value) == 9:
                return color_value
        return default
    def _validate_pdf_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
            return styles
        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_pdf_styles()
    def _get_default_pdf_styles(self) -> Dict[str, Any]:
        """Default PDF styles."""
        return {
            "title": {"font_size": 24, "color": "#1F4E79", "bold": True, "align": "center", "space_after": 30},
            "heading1": {"font_size": 18, "color": "#2F2F2F", "bold": True, "align": "left", "space_after": 12, "space_before": 12},
            "heading2": {"font_size": 14, "color": "#4F4F4F", "bold": True, "align": "left", "space_after": 8, "space_before": 8},
            "paragraph": {"font_size": 11, "color": "#2F2F2F", "bold": False, "align": "left", "space_after": 6, "line_height": 1.2},
            "table_header": {"background": "#4F4F4F", "text_color": "#FFFFFF", "bold": True, "align": "center", "font_size": 12},
            "table_cell": {"background": "#FFFFFF", "text_color": "#2F2F2F", "bold": False, "align": "left", "font_size": 10},
            "bullet_list": {"font_size": 11, "color": "#2F2F2F", "space_after": 3},
            "code_block": {"font": "Courier", "font_size": 9, "color": "#2F2F2F", "background": "#F5F5F5", "space_after": 6}
        }
    def _create_title_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
        """Create title style from style definitions."""
        title_style_def = styles.get("title", {})
        # DEBUG: Show what color and spacing is being used for title
        title_color = title_style_def.get("color", "#1F4E79")
        title_space_after = title_style_def.get("space_after", 30)
        self.services.utils.debugLogToFile(f"PDF TITLE COLOR: {title_color} -> {self._hex_to_color(title_color)}", "PDF_RENDERER")
        self.services.utils.debugLogToFile(f"PDF TITLE SPACE_AFTER: {title_space_after}", "PDF_RENDERER")
        return ParagraphStyle(
            'CustomTitle',
            fontSize=title_style_def.get("font_size", 20),  # Reduced from 24 to 20
            spaceAfter=title_style_def.get("space_after", 30),
            alignment=self._get_alignment(title_style_def.get("align", "center")),
            textColor=self._hex_to_color(title_color),
            leading=title_style_def.get("font_size", 20) * 1.4,  # Add line spacing for multi-line titles
            spaceBefore=0  # Ensure no space before title
        )
    def _create_heading_style(self, styles: Dict[str, Any], level: int) -> ParagraphStyle:
        """Create heading style from style definitions."""
        heading_key = f"heading{level}"
        heading_style_def = styles.get(heading_key, styles.get("heading1", {}))
        return ParagraphStyle(
            f'CustomHeading{level}',
            fontSize=heading_style_def.get("font_size", 18 - level * 2),
            spaceAfter=heading_style_def.get("space_after", 12),
            spaceBefore=heading_style_def.get("space_before", 12),
            alignment=self._get_alignment(heading_style_def.get("align", "left")),
            textColor=self._hex_to_color(heading_style_def.get("color", "#2F2F2F"))
        )
    def _create_normal_style(self, styles: Dict[str, Any]) -> ParagraphStyle:
        """Create normal paragraph style from style definitions."""
        paragraph_style_def = styles.get("paragraph", {})
        return ParagraphStyle(
            'CustomNormal',
            fontSize=paragraph_style_def.get("font_size", 11),
            spaceAfter=paragraph_style_def.get("space_after", 6),
            alignment=self._get_alignment(paragraph_style_def.get("align", "left")),
            textColor=self._hex_to_color(paragraph_style_def.get("color", "#2F2F2F")),
            leading=paragraph_style_def.get("line_height", 1.2) * paragraph_style_def.get("font_size", 11)
        )
    def _get_alignment(self, align: str) -> int:
        """Convert alignment string to reportlab alignment constant."""
        if not align or not isinstance(align, str):
            return TA_LEFT
        align_map = {
            "center": TA_CENTER,
            "left": TA_LEFT,
            "justify": TA_JUSTIFY,
            "right": TA_LEFT,  # ReportLab doesn't have TA_RIGHT, use LEFT as fallback
            "0": TA_LEFT,      # Handle numeric strings
            "1": TA_CENTER,
            "2": TA_JUSTIFY
        }
        return align_map.get(align.lower().strip(), TA_LEFT)
    def _get_table_alignment(self, align: str) -> str:
        """Convert alignment string to ReportLab table alignment string."""
        if not align or not isinstance(align, str):
            return 'LEFT'
        align_map = {
            "center": 'CENTER',
            "left": 'LEFT',
            "justify": 'LEFT',  # Tables don't support justify, use LEFT
            "right": 'RIGHT',
            "0": 'LEFT',       # Handle numeric strings
            "1": 'CENTER',
            "2": 'LEFT'        # Tables don't support justify, use LEFT
        }
        return align_map.get(align.lower().strip(), 'LEFT')
    def _hex_to_color(self, hex_color: str) -> colors.Color:
        """Convert hex color to reportlab color."""
        try:
            hex_color = hex_color.lstrip('#')
            # Handle aRGB format (8 characters: FF + RGB)
            if len(hex_color) == 8:
                # Skip the alpha channel (first 2 characters)
                hex_color = hex_color[2:]
            # Handle RGB format (6 characters)
            if len(hex_color) == 6:
                r = int(hex_color[0:2], 16) / 255.0
                g = int(hex_color[2:4], 16) / 255.0
                b = int(hex_color[4:6], 16) / 255.0
                return colors.Color(r, g, b)
            # Fallback for other formats
            return colors.black
        except:
            return colors.black
    def _render_json_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a single JSON section to PDF elements using AI-generated styles."""
        try:
            section_type = self._get_section_type(section)
            elements = self._get_section_data(section)
            # Process each element in the section
            all_elements = []
            for element in elements:
                if section_type == "table":
                    all_elements.extend(self._render_json_table(element, styles))
                elif section_type == "bullet_list":
                    all_elements.extend(self._render_json_bullet_list(element, styles))
                elif section_type == "heading":
                    all_elements.extend(self._render_json_heading(element, styles))
                elif section_type == "paragraph":
                    all_elements.extend(self._render_json_paragraph(element, styles))
                elif section_type == "code_block":
                    all_elements.extend(self._render_json_code_block(element, styles))
                elif section_type == "image":
                    all_elements.extend(self._render_json_image(element, styles))
                else:
                    # Fallback to paragraph for unknown types
                    all_elements.extend(self._render_json_paragraph(element, styles))
            return all_elements
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
            return [Paragraph(f"[Error rendering section: {str(e)}]", self._create_normal_style(styles))]
    def _render_json_table(self, table_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON table to PDF elements using AI-generated styles."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            if not headers or not rows:
                return []
            # Prepare table data
            table_data_list = [headers] + rows
            # Create table
            table = Table(table_data_list)
            # Apply styling
            table_header_style = styles.get("table_header", {})
            table_cell_style = styles.get("table_cell", {})
            table_style = [
                ('BACKGROUND', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("background", "#4F4F4F"))),
                ('TEXTCOLOR', (0, 0), (-1, 0), self._hex_to_color(table_header_style.get("text_color", "#FFFFFF"))),
                ('ALIGN', (0, 0), (-1, -1), self._get_table_alignment(table_cell_style.get("align", "left"))),
                ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold' if table_header_style.get("bold", True) else 'Helvetica'),
                ('FONTSIZE', (0, 0), (-1, 0), table_header_style.get("font_size", 12)),
                ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
                ('BACKGROUND', (0, 1), (-1, -1), self._hex_to_color(table_cell_style.get("background", "#FFFFFF"))),
                ('FONTSIZE', (0, 1), (-1, -1), table_cell_style.get("font_size", 10)),
                ('GRID', (0, 0), (-1, -1), 1, colors.black)
            ]
            table.setStyle(TableStyle(table_style))
            return [table, Spacer(1, 12)]
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return []
    def _render_json_bullet_list(self, list_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON bullet list to PDF elements using AI-generated styles."""
        try:
            items = list_data.get("items", [])
            bullet_style_def = styles.get("bullet_list", {})
            elements = []
            for item in items:
                if isinstance(item, str):
                    elements.append(Paragraph(f"• {item}", self._create_normal_style(styles)))
                elif isinstance(item, dict) and "text" in item:
                    elements.append(Paragraph(f"• {item['text']}", self._create_normal_style(styles)))
            if elements:
                elements.append(Spacer(1, bullet_style_def.get("space_after", 3)))
            return elements
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return []
    def _render_json_heading(self, heading_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON heading to PDF elements using AI-generated styles."""
        try:
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            if text:
                level = max(1, min(6, level))
                heading_style = self._create_heading_style(styles, level)
                return [Paragraph(text, heading_style)]
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return []
    def _render_json_paragraph(self, paragraph_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON paragraph to PDF elements using AI-generated styles."""
        try:
            text = paragraph_data.get("text", "")
            if text:
                return [Paragraph(text, self._create_normal_style(styles))]
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return []
    def _render_json_code_block(self, code_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON code block to PDF elements using AI-generated styles."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            code_style_def = styles.get("code_block", {})
            if code:
                elements = []
                if language:
                    lang_style = ParagraphStyle(
                        'CodeLanguage',
                        fontSize=code_style_def.get("font_size", 9),
                        textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
                        fontName='Helvetica-Bold'
                    )
                    elements.append(Paragraph(f"Code ({language}):", lang_style))
                code_style = ParagraphStyle(
                    'CodeBlock',
                    fontSize=code_style_def.get("font_size", 9),
                    textColor=self._hex_to_color(code_style_def.get("color", "#2F2F2F")),
                    fontName=code_style_def.get("font", "Courier"),
                    backColor=self._hex_to_color(code_style_def.get("background", "#F5F5F5")),
                    spaceAfter=code_style_def.get("space_after", 6)
                )
                elements.append(Paragraph(code, code_style))
                return elements
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return []
    def _render_json_image(self, image_data: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]:
        """Render a JSON image to PDF elements."""
        try:
            base64_data = image_data.get("base64Data", "")
            alt_text = image_data.get("altText", "Image")
            if base64_data:
                # For now, just add a placeholder since reportlab image handling is complex
                return [Paragraph(f"[Image: {alt_text}]", self._create_normal_style(styles))]
            return []
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return [Paragraph(f"[Image: {image_data.get('altText', 'Image')}]", self._create_normal_style(styles))]
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@ -0,0 +1,885 @@
 import logging
 import base64
 import io
 from typing import Dict, Any, Optional, Tuple, List
 from .rendererBaseTemplate import BaseRenderer
 logger = logging.getLogger(__name__)
 class RendererPptx(BaseRenderer):
    """Renderer for PowerPoint (.pptx) files using python-pptx library."""
    def __init__(self):
        super().__init__()
        self.supported_formats = ["pptx", "ppt"]
        self.output_mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
    @classmethod
    def get_supported_formats(cls) -> list:
        """Get list of supported output formats."""
        return ["pptx", "ppt"]
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """
        Render content as PowerPoint presentation from JSON data.
        Args:
            extracted_content: JSON content to render as presentation
            title: Title for the presentation
            user_prompt: User prompt for AI styling
            ai_service: AI service for styling
            **kwargs: Additional rendering options
        Returns:
            Base64-encoded PowerPoint presentation as string
        """
        try:
            # Import python-pptx
            from pptx import Presentation
            from pptx.util import Inches, Pt
            from pptx.enum.text import PP_ALIGN
            from pptx.dml.color import RGBColor
            import re
            # Get AI-generated styling definitions first
            styles = await self._get_pptx_styles(user_prompt, ai_service)
            # Create new presentation
            prs = Presentation()
            # Set slide size based on user intent (default to 16:9)
            slide_size = styles.get("slide_size", "16:9")
            if slide_size == "4:3":
                prs.slide_width = Inches(10)
                prs.slide_height = Inches(7.5)
            else:  # Default to 16:9
                prs.slide_width = Inches(13.33)
                prs.slide_height = Inches(7.5)
            # Generate slides from JSON content
            slides_data = await self._parse_json_to_slides(extracted_content, title, styles)
            logger.info(f"Parsed {len(slides_data)} slides from JSON content")
            # Debug: Show first 200 chars of content
            logger.info(f"JSON content preview: {str(extracted_content)[:200]}...")
            for i, slide_data in enumerate(slides_data):
                logger.info(f"Slide {i+1}: '{slide_data.get('title', 'No title')}' - {len(slide_data.get('content', ''))} chars")
                # Debug: Show slide content preview
                slide_content = slide_data.get('content', '')
                if slide_content:
                    logger.info(f"  Content preview: '{slide_content[:100]}...'")
                else:
                    logger.warning(f"  ⚠️ Slide {i+1} has NO content!")
                # Create slide with appropriate layout based on content
                slide_layout_index = self._get_slide_layout_index(slide_data, styles)
                slide_layout = prs.slide_layouts[slide_layout_index]
                slide = prs.slides.add_slide(slide_layout)
                # Set title with AI-generated styling
                title_shape = slide.shapes.title
                title_shape.text = slide_data.get("title", "Slide")
                # Apply title styling
                title_style = styles.get("title", {})
                if title_shape.text_frame.paragraphs[0].font:
                    title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 44))
                    title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
                    title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
                    title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
                # Set content with AI-generated styling
                content_shape = slide.placeholders[1]
                content_text = slide_data.get("content", "")
                # Format content text with AI styles
                text_frame = content_shape.text_frame
                text_frame.clear()
                # Split content into paragraphs
                paragraphs = content_text.split('\n\n')
                for i, paragraph in enumerate(paragraphs):
                    if paragraph.strip():
                        if i == 0:
                            p = text_frame.paragraphs[0]
                        else:
                            p = text_frame.add_paragraph()
                        p.text = paragraph.strip()
                        # Apply AI-generated styling based on content type
                        if paragraph.startswith('#'):
                            # Header
                            p.text = paragraph.lstrip('#').strip()
                            heading_style = styles.get("heading", {})
                            p.font.size = Pt(heading_style.get("font_size", 32))
                            p.font.bold = heading_style.get("bold", True)
                            heading_color = self._get_safe_color(heading_style.get("color", (47, 47, 47)))
                            p.font.color.rgb = RGBColor(*heading_color)
                        elif paragraph.startswith('##'):
                            # Subheader
                            p.text = paragraph.lstrip('#').strip()
                            subheading_style = styles.get("subheading", {})
                            p.font.size = Pt(subheading_style.get("font_size", 24))
                            p.font.bold = subheading_style.get("bold", True)
                            subheading_color = self._get_safe_color(subheading_style.get("color", (79, 79, 79)))
                            p.font.color.rgb = RGBColor(*subheading_color)
                        elif paragraph.startswith('*') and paragraph.endswith('*'):
                            # Bold text
                            p.text = paragraph.strip('*')
                            paragraph_style = styles.get("paragraph", {})
                            p.font.size = Pt(paragraph_style.get("font_size", 18))
                            p.font.bold = True
                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
                            p.font.color.rgb = RGBColor(*paragraph_color)
                        else:
                            # Regular text
                            paragraph_style = styles.get("paragraph", {})
                            p.font.size = Pt(paragraph_style.get("font_size", 18))
                            p.font.bold = paragraph_style.get("bold", False)
                            paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
                            p.font.color.rgb = RGBColor(*paragraph_color)
                        # Apply alignment
                        align = paragraph_style.get("align", "left")
                        if align == "center":
                            p.alignment = PP_ALIGN.CENTER
                        elif align == "right":
                            p.alignment = PP_ALIGN.RIGHT
                        else:
                            p.alignment = PP_ALIGN.LEFT
            # If no slides were created, create a default slide
            if not slides_data:
                slide_layout = prs.slide_layouts[0]  # Title slide layout
                slide = prs.slides.add_slide(slide_layout)
                title_shape = slide.shapes.title
                title_shape.text = title
                # Apply title styling to default slide
                title_style = styles.get("title", {})
                if title_shape.text_frame.paragraphs[0].font:
                    title_shape.text_frame.paragraphs[0].font.size = Pt(title_style.get("font_size", 48))
                    title_shape.text_frame.paragraphs[0].font.bold = title_style.get("bold", True)
                    title_color = self._get_safe_color(title_style.get("color", (31, 78, 121)))
                    title_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*title_color)
                subtitle_shape = slide.placeholders[1]
                subtitle_shape.text = "Generated by PowerOn AI System"
                # Apply subtitle styling
                paragraph_style = styles.get("paragraph", {})
                if subtitle_shape.text_frame.paragraphs[0].font:
                    subtitle_shape.text_frame.paragraphs[0].font.size = Pt(paragraph_style.get("font_size", 20))
                    subtitle_shape.text_frame.paragraphs[0].font.bold = paragraph_style.get("bold", False)
                    paragraph_color = self._get_safe_color(paragraph_style.get("color", (47, 47, 47)))
                    subtitle_shape.text_frame.paragraphs[0].font.color.rgb = RGBColor(*paragraph_color)
            # Save to buffer
            buffer = io.BytesIO()
            prs.save(buffer)
            buffer.seek(0)
            # Convert to base64
            pptx_bytes = buffer.getvalue()
            pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
            logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
            return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
        except ImportError:
            logger.error("python-pptx library not installed. Install with: pip install python-pptx")
            return "python-pptx library not installed", "text/plain"
        except Exception as e:
            logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
            return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
    def _parse_content_to_slides(self, content: str, title: str) -> list:
        """
        Parse content into slide data structure.
        Args:
            content: Content to parse
            title: Presentation title
        Returns:
            List of slide data dictionaries
        """
        slides = []
        # Split content by slide markers or headers
        slide_sections = self._split_content_into_slides(content)
        for i, section in enumerate(slide_sections):
            if section.strip():
                slide_data = {
                    "title": f"Slide {i + 1}",
                    "content": section.strip()
                }
                # Extract title from content if it starts with #
                lines = section.strip().split('\n')
                if lines and lines[0].startswith('#'):
                    # Remove # symbols and clean up title
                    slide_title = lines[0].lstrip('#').strip()
                    slide_data["title"] = slide_title
                    slide_data["content"] = '\n'.join(lines[1:]).strip()
                elif lines and lines[0].strip():
                    # Use first line as title if it looks like a title
                    first_line = lines[0].strip()
                    if len(first_line) < 100 and not first_line.endswith('.'):
                        slide_data["title"] = first_line
                        slide_data["content"] = '\n'.join(lines[1:]).strip()
                slides.append(slide_data)
        return slides
    def _split_content_into_slides(self, content: str) -> list:
        """
        Split content into individual slides based on headers and structure.
        Args:
            content: Content to split
        Returns:
            List of slide content strings
        """
        import re
        # First, try to split by major headers (# or ##)
        # This is the most common case for AI-generated content
        header_pattern = r'^(#{1,2})\s+(.+)$'
        lines = content.split('\n')
        slides = []
        current_slide = []
        for line in lines:
            # Check if this line is a header
            header_match = re.match(header_pattern, line.strip())
            if header_match:
                # If we have content in current slide, save it
                if current_slide:
                    slide_content = '\n'.join(current_slide).strip()
                    if slide_content:
                        slides.append(slide_content)
                    current_slide = []
                # Start new slide with this header
                current_slide.append(line)
            else:
                # Add line to current slide
                current_slide.append(line)
        # Add the last slide
        if current_slide:
            slide_content = '\n'.join(current_slide).strip()
            if slide_content:
                slides.append(slide_content)
        # If we found slides with headers, return them
        if len(slides) > 1:
            return slides
        # Fallback: Split by double newlines
        sections = content.split('\n\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]
        # Another fallback: Split by double newlines
        sections = content.split('\n\n')
        if len(sections) > 1:
            return [s.strip() for s in sections if s.strip()]
        # Last resort: return as single slide
        return [content.strip()]
    def get_output_mime_type(self) -> str:
        """Get MIME type for rendered output."""
        return self.output_mime_type
    async def _get_pptx_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get PowerPoint styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": 52, "color": "#1B365D", "bold": True, "align": "center"},
            "heading": {"font_size": 36, "color": "#2C5F2D", "bold": True, "align": "left"},
            "subheading": {"font_size": 28, "color": "#4A90E2", "bold": True, "align": "left"},
            "paragraph": {"font_size": 20, "color": "#2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 20, "color": "#2F2F2F", "indent": 20},
            "table_header": {"font_size": 18, "color": "#FFFFFF", "bold": True, "background": "#1B365D"},
            "table_cell": {"font_size": 16, "color": "#2F2F2F", "bold": False, "background": "#F8F9FA"},
            "slide_size": "16:9",
            "content_per_slide": "concise",
            "design_theme": "corporate",
            "color_scheme": "professional",
            "background_style": "clean",
            "accent_colors": ["#1B365D", "#2C5F2D", "#4A90E2", "#6B7280"],
            "professional_grade": True,
            "executive_ready": True
        }
        style_template = self._create_professional_pptx_template(user_prompt, style_schema)
        # Use our own _get_ai_styles_with_pptx_colors method to ensure proper color conversion
        styles = await self._get_ai_styles_with_pptx_colors(ai_service, style_template, self._get_default_pptx_styles())
        # Validate PowerPoint-specific requirements
        return self._validate_pptx_styles_readability(styles)
    def _create_professional_pptx_template(self, user_prompt: str, style_schema: Dict[str, Any]) -> str:
        """Create a professional PowerPoint-specific AI style template for corporate-quality slides."""
        import json
        schema_json = json.dumps(style_schema, indent=4)
        return f"""Customize the JSON below for professional PowerPoint slides.
 User Request: {user_prompt or "Create professional corporate slides"}
 Rules:
 - Use professional colors (blues, grays, deep greens)
 - Large, readable font sizes
 - High contrast
 - Sophisticated color palettes
 Return ONLY this JSON with your changes:
 {schema_json}
 JSON ONLY. NO OTHER TEXT."""
    async def _get_ai_styles_with_pptx_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
        """Get AI styles with proper PowerPoint color conversion."""
        if not ai_service:
            return default_styles
        try:
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=style_template, context="", options=request_options)
            # Check if AI service is properly configured
            if not hasattr(ai_service, 'aiObjects') or not ai_service.aiObjects:
                self.logger.warning("AI service not properly configured, using defaults")
                return default_styles
            response = await ai_service.aiObjects.call(request)
            # Check if response is valid
            if not response:
                self.logger.warning("AI service returned no response, using defaults")
                return default_styles
            import json
            import re
            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""
            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return default_styles
            # Log the raw response for debugging
            self.logger.debug(f"AI styling raw response: {result[:200]}...")
            # Extract JSON from various formats
            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1).strip()
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
            # Try to extract JSON from explanatory text
            json_patterns = [
                r'\{[^{}]*"title"[^{}]*\}',  # Simple JSON object
                r'\{.*?"title".*?\}',        # JSON with title field
                r'\{.*?"font_size".*?\}',    # JSON with font_size field
            ]
            for pattern in json_patterns:
                json_match = re.search(pattern, result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                    break
            # Additional cleanup - remove any leading/trailing whitespace and newlines
            result = result.strip()
            # Check if result is still empty after cleanup
            if not result:
                self.logger.warning("AI styling returned empty content after cleanup, using defaults")
                return default_styles
            # Try to parse JSON
            try:
                styles = json.loads(result)
                self.logger.debug(f"Successfully parsed AI styles: {list(styles.keys())}")
            except json.JSONDecodeError as json_error:
                self.logger.warning(f"AI styling returned invalid JSON: {json_error}")
                self.logger.warning(f"Raw content that failed to parse: {result[:100]}...")
                # Try to extract just the JSON part if it's embedded in text
                json_start = result.find('{')
                json_end = result.rfind('}')
                if json_start != -1 and json_end != -1 and json_end > json_start:
                    json_part = result[json_start:json_end+1]
                    try:
                        styles = json.loads(json_part)
                        self.logger.info("Successfully extracted JSON from explanatory text")
                        self.logger.debug(f"Extracted AI styles: {list(styles.keys())}")
                    except json.JSONDecodeError:
                        self.logger.warning("Could not extract valid JSON from response, using defaults")
                        return default_styles
                else:
                    return default_styles
            # Convert colors to PowerPoint RGB format
            styles = self._convert_colors_format(styles)
            return styles
        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return default_styles
    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Convert hex colors to RGB format for PowerPoint compatibility."""
        try:
            for style_name, style_config in styles.items():
                if isinstance(style_config, dict):
                    for prop, value in style_config.items():
                        if isinstance(value, str) and value.startswith('#'):
                            # Convert hex to RGB tuple for PowerPoint
                            hex_color = value.lstrip('#')
                            if len(hex_color) == 6:
                                r = int(hex_color[0:2], 16)
                                g = int(hex_color[2:4], 16)
                                b = int(hex_color[4:6], 16)
                                styles[style_name][prop] = (r, g, b)
                            elif len(hex_color) == 8:  # aRGB format
                                r = int(hex_color[2:4], 16)
                                g = int(hex_color[4:6], 16)
                                b = int(hex_color[6:8], 16)
                                styles[style_name][prop] = (r, g, b)
            return styles
        except Exception as e:
            self.logger.warning(f"Color conversion failed: {str(e)}")
            return styles
    def _get_safe_color(self, color_value, default=(0, 0, 0)) -> tuple:
        """Get a safe RGB color tuple for PowerPoint."""
        if isinstance(color_value, tuple) and len(color_value) == 3:
            return color_value
        elif isinstance(color_value, str) and color_value.startswith('#'):
            hex_color = color_value.lstrip('#')
            if len(hex_color) == 6:
                r = int(hex_color[0:2], 16)
                g = int(hex_color[2:4], 16)
                b = int(hex_color[4:6], 16)
                return (r, g, b)
            elif len(hex_color) == 8:  # aRGB format
                r = int(hex_color[2:4], 16)
                g = int(hex_color[4:6], 16)
                b = int(hex_color[6:8], 16)
                return (r, g, b)
        return default
    def _validate_pptx_styles_readability(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix readability issues in AI-generated styles."""
        try:
            # Ensure minimum font sizes for PowerPoint readability
            min_font_sizes = {
                "title": 36,
                "heading": 24,
                "subheading": 20,
                "paragraph": 14,
                "bullet_list": 14,
                "table_header": 12,
                "table_cell": 12
            }
            for style_name, min_size in min_font_sizes.items():
                if style_name in styles:
                    current_size = styles[style_name].get("font_size", 12)
                    if current_size < min_size:
                        styles[style_name]["font_size"] = min_size
            return styles
        except Exception as e:
            logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_pptx_styles()
    def _get_default_pptx_styles(self) -> Dict[str, Any]:
        """Default PowerPoint styles with corporate professional color scheme."""
        return {
            "title": {"font_size": 52, "color": (27, 54, 93), "bold": True, "align": "center"},
            "heading": {"font_size": 36, "color": (44, 95, 45), "bold": True, "align": "left"},
            "subheading": {"font_size": 28, "color": (74, 144, 226), "bold": True, "align": "left"},
            "paragraph": {"font_size": 20, "color": (47, 47, 47), "bold": False, "align": "left"},
            "bullet_list": {"font_size": 20, "color": (47, 47, 47), "indent": 20},
            "table_header": {"font_size": 18, "color": (255, 255, 255), "bold": True, "background": (27, 54, 93)},
            "table_cell": {"font_size": 16, "color": (47, 47, 47), "bold": False, "background": (248, 249, 250)},
            "slide_size": "16:9",
            "content_per_slide": "concise",
            "design_theme": "corporate",
            "color_scheme": "professional",
            "background_style": "clean",
            "accent_colors": [(27, 54, 93), (44, 95, 45), (74, 144, 226), (107, 114, 128)],
            "professional_grade": True,
            "executive_ready": True
        }
    async def _parse_json_to_slides(self, json_content: Dict[str, Any], title: str, styles: Dict[str, Any]) -> List[Dict[str, Any]]:
        """
        Parse JSON content into slide data structure.
        Args:
            json_content: JSON content to parse
            title: Presentation title
            styles: AI-generated styles
        Returns:
            List of slide data dictionaries
        """
        slides = []
        try:
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Create title slide
            slides.append({
                "title": document_title,
                "content": "Generated by PowerOn AI System\n\n" + self._format_timestamp()
            })
            # Process sections into slides based on content and user intent
            sections = json_content.get("sections", [])
            slides.extend(self._create_slides_from_sections(sections, styles))
            # If no content slides were created, create a default content slide
            if len(slides) == 1:  # Only title slide
                slides.append({
                    "title": "Content Overview",
                    "content": "No structured content found in the source documents.\n\nPlease check the source documents and try again."
                })
            return slides
        except Exception as e:
            logger.error(f"Error parsing JSON to slides: {str(e)}")
            # Return minimal fallback slides
            return [
                {
                    "title": title,
                    "content": "Error parsing content for presentation"
                }
            ]
    def _create_slide_from_section(self, section: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
        """Create a slide from a JSON section."""
        try:
            # Get section title from data or use default
            section_title = "Untitled Section"
            if section.get("content_type") == "heading":
                # Extract text from elements array
                for element in section.get("elements", []):
                    if isinstance(element, dict) and "text" in element:
                        section_title = element.get("text", "Untitled Section")
                        break
            elif section.get("title"):
                section_title = section.get("title")
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            # Build slide content based on section type
            content_parts = []
            if content_type == "table":
                content_parts.append(self._format_table_for_slide(elements))
            elif content_type == "list":
                content_parts.append(self._format_list_for_slide(elements))
            elif content_type == "heading":
                content_parts.append(self._format_heading_for_slide(elements))
            elif content_type == "paragraph":
                content_parts.append(self._format_paragraph_for_slide(elements))
            elif content_type == "code":
                content_parts.append(self._format_code_for_slide(elements))
            else:
                content_parts.append(self._format_paragraph_for_slide(elements))
            # Combine content parts
            slide_content = "\n\n".join(filter(None, content_parts))
            return {
                "title": section_title,
                "content": slide_content
            }
        except Exception as e:
            logger.warning(f"Error creating slide from section: {str(e)}")
            return None
    def _format_table_for_slide(self, elements: List[Dict[str, Any]]) -> str:
        """Format table data for slide presentation."""
        try:
            # Extract table data from elements array
            headers = []
            rows = []
            for element in elements:
                if isinstance(element, dict) and "headers" in element and "rows" in element:
                    headers = element.get("headers", [])
                    rows = element.get("rows", [])
                    break
            if not headers:
                return ""
            # Create table representation
            table_lines = []
            # Add headers
            header_line = " | ".join(str(h) for h in headers)
            table_lines.append(header_line)
            # Add separator
            separator = "-" * len(header_line)
            table_lines.append(separator)
            # Add data rows (limit based on content density)
            max_rows = 5  # Default limit
            for row in rows[:max_rows]:
                row_line = " | ".join(str(cell) for cell in row)
                table_lines.append(row_line)
            if len(rows) > max_rows:
                table_lines.append(f"... and {len(rows) - max_rows} more rows")
            return "\n".join(table_lines)
        except Exception as e:
            logger.warning(f"Error formatting table for slide: {str(e)}")
            return ""
    def _format_list_for_slide(self, list_data: Dict[str, Any]) -> str:
        """Format list data for slide presentation."""
        try:
            items = list_data.get("items", [])
            if not items:
                return ""
            # Create list representation
            list_lines = []
            for item in items:
                if isinstance(item, dict):
                    text = item.get("text", "")
                    list_lines.append(f"• {text}")
                    # Add subitems (limit to 3 for readability)
                    subitems = item.get("subitems", [])[:3]
                    for subitem in subitems:
                        if isinstance(subitem, dict):
                            list_lines.append(f"  - {subitem.get('text', '')}")
                        else:
                            list_lines.append(f"  - {subitem}")
                else:
                    list_lines.append(f"• {str(item)}")
            return "\n".join(list_lines)
        except Exception as e:
            logger.warning(f"Error formatting list for slide: {str(e)}")
            return ""
    def _format_heading_for_slide(self, heading_data: Dict[str, Any]) -> str:
        """Format heading data for slide presentation."""
        try:
            text = heading_data.get("text", "")
            level = heading_data.get("level", 1)
            if text:
                return f"{'#' * level} {text}"
            return ""
        except Exception as e:
            logger.warning(f"Error formatting heading for slide: {str(e)}")
            return ""
    def _format_paragraph_for_slide(self, paragraph_data: Dict[str, Any]) -> str:
        """Format paragraph data for slide presentation."""
        try:
            text = paragraph_data.get("text", "")
            if text:
                # Limit paragraph length based on content density
                max_length = 200  # Default limit
                if len(text) > max_length:
                    text = text[:max_length] + "..."
                return text
            return ""
        except Exception as e:
            logger.warning(f"Error formatting paragraph for slide: {str(e)}")
            return ""
    def _format_code_for_slide(self, code_data: Dict[str, Any]) -> str:
        """Format code data for slide presentation."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            if code:
                # Limit code length based on content density
                max_length = 100  # Default limit
                if len(code) > max_length:
                    code = code[:max_length] + "..."
                if language:
                    return f"Code ({language}):\n{code}"
                else:
                    return f"Code:\n{code}"
            return ""
        except Exception as e:
            logger.warning(f"Error formatting code for slide: {str(e)}")
            return ""
    def _get_slide_layout_index(self, slide_data: Dict[str, Any], styles: Dict[str, Any]) -> int:
        """Determine the best professional slide layout based on content."""
        try:
            content = slide_data.get("content", "")
            title = slide_data.get("title", "")
            # Check if it's a title slide (first slide)
            if not content or "Generated by PowerOn AI System" in content:
                return 0  # Title slide layout
            # Professional layout selection based on content
            if "|" in content and "-" in content:
                # Has both tables and lists - use content with caption for professional look
                return 2
            elif "|" in content:
                # Has tables - use content layout for clean table presentation
                return 1
            elif content.count("•") > 2:
                # Has many bullet points - use content layout for better readability
                return 1
            elif len(content) > 200:
                # Long content - use content layout for better text flow
                return 1
            elif title and len(title) > 20:
                # Long title - use title and content layout
                return 1
            else:
                # Default to title and content layout for professional appearance
                return 1
        except Exception as e:
            logger.warning(f"Error determining slide layout: {str(e)}")
            return 1  # Default to title and content layout
    def _create_slides_from_sections(self, sections: List[Dict[str, Any]], styles: Dict[str, Any]) -> List[Dict[str, Any]]:
        """Create slides from sections based on content density and user intent."""
        try:
            slides = []
            content_per_slide = styles.get("content_per_slide", "concise")
            # Group sections by type and create slides
            current_slide_content = []
            current_slide_title = "Content Overview"
            for section in sections:
                section_type = section.get("content_type", "paragraph")
                elements = section.get("elements", [])
                if section_type == "heading":
                    # If we have accumulated content, create a slide
                    if current_slide_content:
                        slides.append({
                            "title": current_slide_title,
                            "content": "\n\n".join(current_slide_content)
                        })
                        current_slide_content = []
                    # Start new slide with heading as title
                    for element in elements:
                        if isinstance(element, dict) and "text" in element:
                            current_slide_title = element.get("text", "Untitled Section")
                            break
                else:
                    # Add content to current slide
                    formatted_content = self._format_section_content(section)
                    if formatted_content:
                        current_slide_content.append(formatted_content)
            # Add final slide if there's content
            if current_slide_content:
                slides.append({
                    "title": current_slide_title,
                    "content": "\n\n".join(current_slide_content)
                })
            return slides
        except Exception as e:
            logger.warning(f"Error creating slides from sections: {str(e)}")
            return []
    def _format_section_content(self, section: Dict[str, Any]) -> str:
        """Format section content for slide presentation."""
        try:
            content_type = section.get("content_type", "paragraph")
            elements = section.get("elements", [])
            # Process each element in the section
            content_parts = []
            for element in elements:
                if content_type == "table":
                    content_parts.append(self._format_table_for_slide([element]))
                elif content_type == "list":
                    content_parts.append(self._format_list_for_slide([element]))
                elif content_type == "heading":
                    content_parts.append(self._format_heading_for_slide([element]))
                elif content_type == "paragraph":
                    content_parts.append(self._format_paragraph_for_slide([element]))
                elif content_type == "code":
                    content_parts.append(self._format_code_for_slide([element]))
                else:
                    content_parts.append(self._format_paragraph_for_slide([element]))
            return "\n\n".join(filter(None, content_parts))
        except Exception as e:
            logger.warning(f"Error formatting section content: {str(e)}")
            return ""
    def _format_timestamp(self) -> str:
        """Format current timestamp for presentation generation."""
        from datetime import datetime, UTC
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@ -0,0 +1,256 @@
 """
 Text renderer for report generation.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class RendererText(BaseRenderer):
    """Renders content to plain text format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported text formats (excluding formats with dedicated renderers)."""
        return [
            'txt', 'text', 'plain',
            # Programming languages
            'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
            'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
            'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
            'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
            'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
            # Web technologies (excluding html/htm which have dedicated renderer)
            'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
            # Data formats (excluding csv, md/markdown which have dedicated renderers)
            'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
            # Configuration files
            'env', 'properties', 'conf', 'config', 'rc',
            'gitattributes', 'editorconfig', 'eslintrc',
            # Documentation
            'readme', 'changelog', 'license', 'authors',
            'contributing', 'todo', 'notes', 'docs'
        ]
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return [
            'ascii', 'utf8', 'utf-8', 'code', 'source',
            'script', 'program', 'file', 'document',
            'raw', 'unformatted', 'plaintext'
        ]
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for text renderer."""
        return 90
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to plain text format."""
        try:
            # Generate text from JSON structure
            text_content = self._generate_text_from_json(extracted_content, title)
            return text_content, "text/plain"
        except Exception as e:
            self.logger.error(f"Error rendering text: {str(e)}")
            # Return minimal text fallback
            return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
    def _generate_text_from_json(self, json_content: Dict[str, Any], title: str) -> str:
        """Generate text content from structured JSON document."""
        try:
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Build text content
            text_parts = []
            # Document title
            text_parts.append(document_title)
            text_parts.append("=" * len(document_title))
            text_parts.append("")
            # Process each section
            sections = json_content.get("sections", [])
            for section in sections:
                section_text = self._render_json_section(section)
                if section_text:
                    text_parts.append(section_text)
                    text_parts.append("")  # Add spacing between sections
            # Add generation info
            text_parts.append("")
            text_parts.append(f"Generated: {self._format_timestamp()}")
            return '\n'.join(text_parts)
        except Exception as e:
            self.logger.error(f"Error generating text from JSON: {str(e)}")
            raise Exception(f"Text generation failed: {str(e)}")
    def _render_json_section(self, section: Dict[str, Any]) -> str:
        """Render a single JSON section to text."""
        try:
            section_type = self._get_section_type(section)
            section_data = self._get_section_data(section)
            if section_type == "table":
                # Process the section data to extract table structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_table(processed_data)
            elif section_type == "bullet_list":
                # Process the section data to extract bullet list structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_bullet_list(processed_data)
            elif section_type == "heading":
                # Render each heading element in the elements array
                # section_data is already the elements array from _get_section_data
                rendered_elements = []
                for element in section_data:
                    rendered_elements.append(self._render_json_heading(element))
                return "\n".join(rendered_elements)
            elif section_type == "paragraph":
                # Render each paragraph element in the elements array
                # section_data is already the elements array from _get_section_data
                rendered_elements = []
                for element in section_data:
                    rendered_elements.append(self._render_json_paragraph(element))
                return "\n".join(rendered_elements)
            elif section_type == "code_block":
                # Process the section data to extract code block structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_code_block(processed_data)
            elif section_type == "image":
                # Process the section data to extract image structure
                processed_data = self._process_section_by_type(section)
                return self._render_json_image(processed_data)
            else:
                # Fallback to paragraph for unknown types - render each element
                # section_data is already the elements array from _get_section_data
                rendered_elements = []
                for element in section_data:
                    rendered_elements.append(self._render_json_paragraph(element))
                return "\n".join(rendered_elements)
        except Exception as e:
            self.logger.warning(f"Error rendering section {self._get_section_id(section)}: {str(e)}")
            return f"[Error rendering section: {str(e)}]"
    def _render_json_table(self, table_data: Dict[str, Any]) -> str:
        """Render a JSON table to text."""
        try:
            headers = table_data.get("headers", [])
            rows = table_data.get("rows", [])
            if not headers or not rows:
                return ""
            text_parts = []
            # Create table header
            header_line = " | ".join(str(header) for header in headers)
            text_parts.append(header_line)
            # Add separator line
            separator_line = " | ".join("-" * len(str(header)) for header in headers)
            text_parts.append(separator_line)
            # Add data rows
            for row in rows:
                row_line = " | ".join(str(cell_data) for cell_data in row)
                text_parts.append(row_line)
            return '\n'.join(text_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering table: {str(e)}")
            return ""
    def _render_json_bullet_list(self, list_data: Dict[str, Any]) -> str:
        """Render a JSON bullet list to text."""
        try:
            items = list_data.get("items", [])
            if not items:
                return ""
            text_parts = []
            for item in items:
                if isinstance(item, str):
                    text_parts.append(f"- {item}")
                elif isinstance(item, dict) and "text" in item:
                    text_parts.append(f"- {item['text']}")
            return '\n'.join(text_parts)
        except Exception as e:
            self.logger.warning(f"Error rendering bullet list: {str(e)}")
            return ""
    def _render_json_heading(self, heading_data: Dict[str, Any]) -> str:
        """Render a JSON heading to text."""
        try:
            level = heading_data.get("level", 1)
            text = heading_data.get("text", "")
            if text:
                level = max(1, min(6, level))
                if level == 1:
                    return f"{text}\n{'=' * len(text)}"
                elif level == 2:
                    return f"{text}\n{'-' * len(text)}"
                else:
                    return f"{'#' * level} {text}"
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering heading: {str(e)}")
            return ""
    def _render_json_paragraph(self, paragraph_data: Dict[str, Any]) -> str:
        """Render a JSON paragraph to text."""
        try:
            text = paragraph_data.get("text", "")
            return text if text else ""
        except Exception as e:
            self.logger.warning(f"Error rendering paragraph: {str(e)}")
            return ""
    def _render_json_code_block(self, code_data: Dict[str, Any]) -> str:
        """Render a JSON code block to text."""
        try:
            code = code_data.get("code", "")
            language = code_data.get("language", "")
            if code:
                if language:
                    return f"Code ({language}):\n{code}"
                else:
                    return code
            return ""
        except Exception as e:
            self.logger.warning(f"Error rendering code block: {str(e)}")
            return ""
    def _render_json_image(self, image_data: Dict[str, Any]) -> str:
        """Render a JSON image to text."""
        try:
            alt_text = image_data.get("altText", "Image")
            return f"[Image: {alt_text}]"
        except Exception as e:
            self.logger.warning(f"Error rendering image: {str(e)}")
            return f"[Image: {image_data.get('altText', 'Image')}]"
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@ -0,0 +1,791 @@
 """
 Excel renderer for report generation using openpyxl.
 """
 from .rendererBaseTemplate import BaseRenderer
 from typing import Dict, Any, Tuple, List
 import io
 import base64
 from datetime import datetime, UTC
 try:
    from openpyxl import Workbook
    from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
    from openpyxl.utils import get_column_letter
    from openpyxl.worksheet.table import Table, TableStyleInfo
    OPENPYXL_AVAILABLE = True
 except ImportError:
    OPENPYXL_AVAILABLE = False
 class RendererXlsx(BaseRenderer):
    """Renders content to Excel format using openpyxl."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported Excel formats."""
        return ['xlsx', 'xls', 'excel']
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return ['spreadsheet', 'workbook']
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for Excel renderer."""
        return 110
    async def render(self, extracted_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> Tuple[str, str]:
        """Render extracted JSON content to Excel format using AI-analyzed styling."""
        try:
            if not OPENPYXL_AVAILABLE:
                # Fallback to CSV if openpyxl not available
                from .rendererCsv import RendererCsv
                csv_renderer = RendererCsv()
                csv_content, _ = await csv_renderer.render(extracted_content, title, user_prompt, ai_service)
                return csv_content, "text/csv"
            # Generate Excel using AI-analyzed styling
            excel_content = await self._generate_excel_from_json(extracted_content, title, user_prompt, ai_service)
            return excel_content, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
        except Exception as e:
            self.logger.error(f"Error rendering Excel: {str(e)}")
            # Return CSV fallback
            return f"Title,Content\n{title},Error rendering Excel report: {str(e)}", "text/csv"
    def _generate_excel(self, content: str, title: str) -> str:
        """Generate Excel content using openpyxl."""
        try:
            # Create workbook
            wb = Workbook()
            # Remove default sheet
            wb.remove(wb.active)
            # Create sheets
            summary_sheet = wb.create_sheet("Summary", 0)
            data_sheet = wb.create_sheet("Data", 1)
            analysis_sheet = wb.create_sheet("Analysis", 2)
            # Add content to sheets
            self._populate_summary_sheet(summary_sheet, title)
            self._populate_data_sheet(data_sheet, content)
            self._populate_analysis_sheet(analysis_sheet, content)
            # Save to buffer
            buffer = io.BytesIO()
            wb.save(buffer)
            buffer.seek(0)
            # Convert to base64
            excel_bytes = buffer.getvalue()
            excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
            return excel_base64
        except Exception as e:
            self.logger.error(f"Error generating Excel: {str(e)}")
            raise
    def _populate_summary_sheet(self, sheet, title: str):
        """Populate the summary sheet."""
        try:
            # Title
            sheet['A1'] = title
            sheet['A1'].font = Font(size=16, bold=True)
            sheet['A1'].alignment = Alignment(horizontal='center')
            # Generation info
            sheet['A3'] = "Generated:"
            sheet['B3'] = self._format_timestamp()
            sheet['A4'] = "Status:"
            sheet['B4'] = "Generated Successfully"
            # Key metrics placeholder
            sheet['A6'] = "Key Metrics:"
            sheet['A6'].font = Font(bold=True)
            sheet['A7'] = "Total Items:"
            sheet['B7'] = "=COUNTA(Data!A:A)-1"  # Count non-empty cells in Data sheet
            # Auto-adjust column widths
            sheet.column_dimensions['A'].width = 20
            sheet.column_dimensions['B'].width = 30
        except Exception as e:
            self.logger.warning(f"Could not populate summary sheet: {str(e)}")
    def _populate_data_sheet(self, sheet, content: str):
        """Populate the data sheet."""
        try:
            # Headers
            headers = ["Item/Category", "Value/Amount", "Percentage", "Source Document", "Notes/Comments"]
            for col, header in enumerate(headers, 1):
                cell = sheet.cell(row=1, column=col, value=header)
                cell.font = Font(bold=True)
                cell.fill = PatternFill(start_color="CCCCCC", end_color="CCCCCC", fill_type="solid")
            # Process content
            lines = content.split('\n')
            row = 2
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                # Check for table data (lines with |)
                if '|' in line:
                    cells = [cell.strip() for cell in line.split('|') if cell.strip()]
                    for col, cell_data in enumerate(cells[:5], 1):  # Limit to 5 columns
                        sheet.cell(row=row, column=col, value=cell_data)
                    row += 1
                else:
                    # Regular content
                    sheet.cell(row=row, column=1, value=line)
                    row += 1
            # Auto-adjust column widths
            for col in range(1, 6):
                sheet.column_dimensions[get_column_letter(col)].width = 20
        except Exception as e:
            self.logger.warning(f"Could not populate data sheet: {str(e)}")
    def _populate_analysis_sheet(self, sheet, content: str):
        """Populate the analysis sheet."""
        try:
            # Title
            sheet['A1'] = "Analysis & Insights"
            sheet['A1'].font = Font(size=14, bold=True)
            # Content analysis
            lines = content.split('\n')
            row = 3
            sheet['A3'] = "Content Analysis:"
            sheet['A3'].font = Font(bold=True)
            row += 1
            # Count different types of content
            table_lines = sum(1 for line in lines if '|' in line)
            list_lines = sum(1 for line in lines if line.startswith(('- ', '* ')))
            text_lines = len(lines) - table_lines - list_lines
            sheet[f'A{row}'] = f"Total Lines: {len(lines)}"
            row += 1
            sheet[f'A{row}'] = f"Table Rows: {table_lines}"
            row += 1
            sheet[f'A{row}'] = f"List Items: {list_lines}"
            row += 1
            sheet[f'A{row}'] = f"Text Lines: {text_lines}"
            row += 2
            # Recommendations
            sheet[f'A{row}'] = "Recommendations:"
            sheet[f'A{row}'].font = Font(bold=True)
            row += 1
            sheet[f'A{row}'] = "1. Review data accuracy"
            row += 1
            sheet[f'A{row}'] = "2. Consider additional analysis"
            row += 1
            sheet[f'A{row}'] = "3. Update regularly"
            # Auto-adjust column width
            sheet.column_dimensions['A'].width = 30
        except Exception as e:
            self.logger.warning(f"Could not populate analysis sheet: {str(e)}")
    async def _generate_excel_from_json(self, json_content: Dict[str, Any], title: str, user_prompt: str = None, ai_service=None) -> str:
        """Generate Excel content from structured JSON document using AI-generated styling."""
        try:
            # Debug output
            self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT TYPE: {type(json_content)}", "EXCEL_RENDERER")
            self.services.utils.debugLogToFile(f"EXCEL JSON CONTENT KEYS: {list(json_content.keys()) if isinstance(json_content, dict) else 'Not a dict'}", "EXCEL_RENDERER")
            # Get AI-generated styling definitions
            styles = await self._get_excel_styles(user_prompt, ai_service)
            # Validate JSON structure
            if not isinstance(json_content, dict):
                raise ValueError("JSON content must be a dictionary")
            if "sections" not in json_content:
                raise ValueError("JSON content must contain 'sections' field")
            # Use title from JSON metadata if available, otherwise use provided title
            document_title = json_content.get("metadata", {}).get("title", title)
            # Create workbook
            wb = Workbook()
            # Create sheets based on content
            sheets = self._create_excel_sheets(wb, json_content, styles)
            self.services.utils.debugLogToFile(f"EXCEL SHEETS CREATED: {list(sheets.keys()) if sheets else 'None'}", "EXCEL_RENDERER")
            # Populate sheets with content
            self._populate_excel_sheets(sheets, json_content, styles)
            # Save to buffer
            buffer = io.BytesIO()
            wb.save(buffer)
            buffer.seek(0)
            # Convert to base64
            excel_bytes = buffer.getvalue()
            self.services.utils.debugLogToFile(f"EXCEL BYTES LENGTH: {len(excel_bytes)}", "EXCEL_RENDERER")
            try:
                excel_base64 = base64.b64encode(excel_bytes).decode('utf-8')
                self.services.utils.debugLogToFile(f"EXCEL BASE64 LENGTH: {len(excel_base64)}", "EXCEL_RENDERER")
            except Exception as b64_error:
                self.services.utils.debugLogToFile(f"BASE64 ENCODING ERROR: {b64_error}", "EXCEL_RENDERER")
                raise
            return excel_base64
        except Exception as e:
            self.logger.error(f"Error generating Excel from JSON: {str(e)}")
            raise Exception(f"Excel generation failed: {str(e)}")
    async def _get_excel_styles(self, user_prompt: str, ai_service=None) -> Dict[str, Any]:
        """Get Excel styling definitions using base template AI styling."""
        style_schema = {
            "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
            "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
            "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
            "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
        }
        style_template = self._create_ai_style_template("xlsx", user_prompt, style_schema)
        # Use our own _get_ai_styles_with_excel_colors method to ensure proper color conversion
        styles = await self._get_ai_styles_with_excel_colors(ai_service, style_template, self._get_default_excel_styles())
        # Validate and fix contrast issues
        return self._validate_excel_styles_contrast(styles)
    async def _get_ai_styles_with_excel_colors(self, ai_service, style_template: str, default_styles: Dict[str, Any]) -> Dict[str, Any]:
        """Get AI styles with proper Excel color conversion."""
        if not ai_service:
            return default_styles
        try:
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=style_template, context="", options=request_options)
            response = await ai_service.aiObjects.call(request)
            import json
            import re
            # Clean and parse JSON
            result = response.content.strip() if response and response.content else ""
            # Check if result is empty
            if not result:
                self.logger.warning("AI styling returned empty response, using defaults")
                return default_styles
            # Extract JSON from markdown if present
            json_match = re.search(r'```json\s*\n(.*?)\n```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1).strip()
                self.services.utils.debugLogToFile(f"EXTRACTED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
            elif result.startswith('```json'):
                result = re.sub(r'^```json\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
                self.services.utils.debugLogToFile(f"CLEANED JSON FROM MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
            elif result.startswith('```'):
                result = re.sub(r'^```\s*', '', result)
                result = re.sub(r'\s*```$', '', result)
                self.services.utils.debugLogToFile(f"CLEANED JSON FROM GENERIC MARKDOWN: {result[:100]}...", "EXCEL_RENDERER")
            # Try to parse JSON
            try:
                styles = json.loads(result)
            except json.JSONDecodeError as json_error:
                self.logger.warning(f"AI styling returned invalid JSON: {json_error}, using defaults")
                return default_styles
            # Convert colors to Excel aRGB format
            styles = self._convert_colors_format(styles)
            return styles
        except Exception as e:
            self.logger.warning(f"AI styling failed: {str(e)}, using defaults")
            return default_styles
    def _get_safe_color(self, color_value: str, default: str = "FF000000") -> str:
        """Get a safe aRGB color value for Excel (without # prefix)."""
        if not isinstance(color_value, str):
            return default
        # Remove # prefix if present
        if color_value.startswith('#'):
            color_value = color_value[1:]
        if len(color_value) == 6:
            # Convert RRGGBB to AARRGGBB
            return f"FF{color_value}"
        elif len(color_value) == 8:
            # Already aRGB format
            return color_value
        else:
            # Unexpected format, return default
            return default
    def _convert_colors_format(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Convert hex colors to aRGB format for Excel compatibility."""
        try:
            self.services.utils.debugLogToFile(f"CONVERTING COLORS IN STYLES: {styles}", "EXCEL_RENDERER")
            for style_name, style_config in styles.items():
                if isinstance(style_config, dict):
                    for prop, value in style_config.items():
                        if isinstance(value, str) and value.startswith('#') and len(value) == 7:
                            # Convert #RRGGBB to #AARRGGBB (add FF alpha channel)
                            styles[style_name][prop] = f"FF{value[1:]}"
                        elif isinstance(value, str) and value.startswith('#') and len(value) == 9:
                            pass  # Already aRGB format
                        elif isinstance(value, str) and value.startswith('#'):
                            pass  # Unexpected format, keep as is
            return styles
        except Exception as e:
            return styles
    def _validate_excel_styles_contrast(self, styles: Dict[str, Any]) -> Dict[str, Any]:
        """Validate and fix contrast issues in AI-generated styles."""
        try:
            # Fix table header contrast
            if "table_header" in styles:
                header = styles["table_header"]
                bg_color = header.get("background", "#FFFFFF")
                text_color = header.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    header["background"] = "#4F4F4F"
                    header["text_color"] = "#FFFFFF"
            # Fix table cell contrast
            if "table_cell" in styles:
                cell = styles["table_cell"]
                bg_color = cell.get("background", "#FFFFFF")
                text_color = cell.get("text_color", "#000000")
                # If both are white or both are dark, fix it
                if bg_color.upper() == "#FFFFFF" and text_color.upper() == "#FFFFFF":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
                elif bg_color.upper() == "#000000" and text_color.upper() == "#000000":
                    cell["background"] = "#FFFFFF"
                    cell["text_color"] = "#2F2F2F"
            return styles
        except Exception as e:
            self.logger.warning(f"Style validation failed: {str(e)}")
            return self._get_default_excel_styles()
    def _get_default_excel_styles(self) -> Dict[str, Any]:
        """Default Excel styles with aRGB color format."""
        return {
            "title": {"font_size": 16, "color": "#FF1F4E79", "bold": True, "align": "center"},
            "heading": {"font_size": 14, "color": "#FF2F2F2F", "bold": True, "align": "left"},
            "table_header": {"background": "#FF4F4F4F", "text_color": "#FFFFFFFF", "bold": True, "align": "center"},
            "table_cell": {"background": "#FFFFFFFF", "text_color": "#FF2F2F2F", "bold": False, "align": "left"},
            "bullet_list": {"font_size": 11, "color": "#FF2F2F2F", "indent": 2},
            "paragraph": {"font_size": 11, "color": "#FF2F2F2F", "bold": False, "align": "left"},
            "code_block": {"font": "Courier New", "font_size": 10, "color": "#FF2F2F2F", "background": "#FFF5F5F5"}
        }
    def _create_excel_sheets(self, wb: Workbook, json_content: Dict[str, Any], styles: Dict[str, Any]) -> Dict[str, Any]:
        """Create Excel sheets based on content structure and user intent."""
        sheets = {}
        # Get sheet names from AI styles or generate based on content
        sheet_names = styles.get("sheet_names", self._generate_sheet_names_from_content(json_content))
        self.services.utils.debugLogToFile(f"EXCEL SHEET NAMES: {sheet_names}", "EXCEL_RENDERER")
        # Create sheets
        for i, sheet_name in enumerate(sheet_names):
            if i == 0:
                # Use the default sheet for the first sheet
                sheet = wb.active
                sheet.title = sheet_name
            else:
                # Create additional sheets
                sheet = wb.create_sheet(sheet_name, i)
            sheets[sheet_name.lower()] = sheet
        return sheets
    def _generate_sheet_names_from_content(self, json_content: Dict[str, Any]) -> List[str]:
        """Generate sheet names based on actual content structure."""
        sections = json_content.get("sections", [])
        # If no sections, create a single sheet
        if not sections:
            return ["Content"]
        # Generate sheet names based on content structure
        sheet_names = []
        # Check if we have multiple table sections
        table_sections = [s for s in sections if s.get("content_type") == "table"]
        if len(table_sections) > 1:
            # Create separate sheets for each table
            for i, section in enumerate(table_sections, 1):
                section_title = section.get("title", f"Table {i}")
                sheet_names.append(section_title[:31])  # Excel sheet name limit
        else:
            # Single table or mixed content - create main sheet
            document_title = json_content.get("metadata", {}).get("title", "Document")
            sheet_names.append(document_title[:31])  # Excel sheet name limit
            # Add additional sheets for other content types
            content_types = set()
            for section in sections:
                content_type = section.get("content_type", "paragraph")
                content_types.add(content_type)
            if "table" in content_types and len(table_sections) == 1:
                sheet_names.append("Table Data")
            if "list" in content_types:
                sheet_names.append("Lists")
            if "paragraph" in content_types or "heading" in content_types:
                sheet_names.append("Text")
        # Limit to 4 sheets maximum
        return sheet_names[:4]
    def _populate_excel_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any]) -> None:
        """Populate Excel sheets with content from JSON based on actual sheet names."""
        try:
            # Get the actual sheet names that were created
            sheet_names = list(sheets.keys())
            if not sheet_names:
                return
            sections = json_content.get("sections", [])
            table_sections = [s for s in sections if s.get("content_type") == "table"]
            if len(table_sections) > 1:
                # Multiple tables - populate each sheet with its corresponding table
                for i, section in enumerate(table_sections):
                    if i < len(sheet_names):
                        sheet_name = sheet_names[i]
                        sheet = sheets[sheet_name]
                        self._populate_table_sheet(sheet, section, styles, f"Table {i+1}")
            else:
                # Single table or mixed content - use original logic
                first_sheet_name = sheet_names[0]
                self._populate_main_sheet(sheets[first_sheet_name], json_content, styles)
                # If we have multiple sheets, distribute content by type
                if len(sheet_names) > 1:
                    self._populate_content_type_sheets(sheets, json_content, styles, sheet_names[1:])
        except Exception as e:
            self.logger.warning(f"Could not populate Excel sheets: {str(e)}")
    def _populate_table_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], sheet_title: str):
        """Populate a sheet with a single table section."""
        try:
            # Sheet title
            sheet['A1'] = sheet_title
            sheet['A1'].font = Font(size=16, bold=True, color=self._get_safe_color(styles.get("title", {}).get("color", "FF1F4E79")))
            sheet['A1'].alignment = Alignment(horizontal="center")
            # Get table data from elements (canonical JSON format)
            elements = section.get("elements", [])
            if elements and isinstance(elements, list) and len(elements) > 0:
                table_data = elements[0]
                headers = table_data.get("headers", [])
                rows = table_data.get("rows", [])
            else:
                headers = []
                rows = []
            if not headers and not rows:
                sheet['A3'] = "No table data available"
                return
            # Add headers
            header_style = styles.get("table_header", {})
            for col, header in enumerate(headers, 1):
                cell = sheet.cell(row=3, column=col, value=header)
                if header_style.get("bold"):
                    cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
                if header_style.get("background"):
                    cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
            # Add rows
            cell_style = styles.get("table_cell", {})
            for row_idx, row_data in enumerate(rows, 4):
                for col_idx, cell_value in enumerate(row_data, 1):
                    cell = sheet.cell(row=row_idx, column=col_idx, value=cell_value)
                    if cell_style.get("text_color"):
                        cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
            # Auto-adjust column widths
            for col in range(1, len(headers) + 1):
                sheet.column_dimensions[get_column_letter(col)].width = 20
        except Exception as e:
            self.logger.warning(f"Could not populate table sheet: {str(e)}")
    def _populate_main_sheet(self, sheet, json_content: Dict[str, Any], styles: Dict[str, Any]):
        """Populate the main sheet with document overview and all content."""
        try:
            # Document title
            document_title = json_content.get("metadata", {}).get("title", "Generated Report")
            sheet['A1'] = document_title
            # Safety check for title style
            title_style = styles.get("title", {"font_size": 16, "bold": True, "color": "#FF1F4E79", "align": "center"})
            try:
                safe_color = self._get_safe_color(title_style["color"])
                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color=safe_color)
                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
            except Exception as font_error:
                # Try with a safe color
                sheet['A1'].font = Font(size=title_style["font_size"], bold=title_style["bold"], color="FF000000")
                sheet['A1'].alignment = Alignment(horizontal=title_style["align"])
            # Generation info
            sheet['A3'] = "Generated:"
            sheet['B3'] = self._format_timestamp()
            sheet['A4'] = "Status:"
            sheet['B4'] = "Generated Successfully"
            # Document metadata
            metadata = json_content.get("metadata", {})
            if metadata:
                sheet['A6'] = "Document Information:"
                sheet['A6'].font = Font(bold=True)
                row = 7
                for key, value in metadata.items():
                    if key != "title":
                        sheet[f'A{row}'] = f"{key.title()}:"
                        sheet[f'B{row}'] = str(value)
                        row += 1
            # Content overview
            sections = json_content.get("sections", [])
            sheet[f'A{row + 1}'] = "Content Overview:"
            sheet[f'A{row + 1}'].font = Font(bold=True)
            row += 2
            sheet[f'A{row}'] = f"Total Sections: {len(sections)}"
            # Count different content types
            content_types = {}
            for section in sections:
                content_type = section.get("content_type", "unknown")
                content_types[content_type] = content_types.get(content_type, 0) + 1
            for content_type, count in content_types.items():
                row += 1
                sheet[f'A{row}'] = f"{content_type.title()} Sections: {count}"
            # Add all content to this sheet
            row += 2
            for section in sections:
                row = self._add_section_to_sheet(sheet, section, styles, row)
                row += 1  # Empty row between sections
            # Auto-adjust column widths
            sheet.column_dimensions['A'].width = 20
            sheet.column_dimensions['B'].width = 30
        except Exception as e:
            self.logger.warning(f"Could not populate main sheet: {str(e)}")
    def _populate_content_type_sheets(self, sheets: Dict[str, Any], json_content: Dict[str, Any], styles: Dict[str, Any], sheet_names: List[str]):
        """Populate additional sheets based on content types."""
        try:
            sections = json_content.get("sections", [])
            for sheet_name in sheet_names:
                if sheet_name not in sheets:
                    continue
                sheet = sheets[sheet_name]
                sheet_title = sheet_name.title()
                sheet['A1'] = sheet_title
                sheet['A1'].font = Font(size=16, bold=True)
                row = 3
                # Filter sections by content type
                if sheet_name == "tables":
                    filtered_sections = [s for s in sections if s.get("content_type") == "table"]
                elif sheet_name == "lists":
                    filtered_sections = [s for s in sections if s.get("content_type") == "list"]
                elif sheet_name == "text":
                    filtered_sections = [s for s in sections if s.get("content_type") in ["paragraph", "heading"]]
                else:
                    filtered_sections = sections
                for section in filtered_sections:
                    row = self._add_section_to_sheet(sheet, section, styles, row)
                    row += 1  # Empty row between sections
                # Auto-adjust column widths
                for col in range(1, 6):
                    sheet.column_dimensions[get_column_letter(col)].width = 20
        except Exception as e:
            self.logger.warning(f"Could not populate content type sheets: {str(e)}")
    def _add_section_to_sheet(self, sheet, section: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a section to a sheet and return the next row."""
        try:
            # Add section title
            section_title = section.get("title")
            if section_title:
                sheet[f'A{start_row}'] = f"# {section_title}"
                sheet[f'A{start_row}'].font = Font(bold=True)
                start_row += 1
            # Process section based on type
            section_type = section.get("content_type", "paragraph")
            # Handle all section types using elements array
            elements = section.get("elements", [])
            for element in elements:
                if section_type == "table":
                    start_row = self._add_table_to_excel(sheet, element, styles, start_row)
                elif section_type == "list":
                    start_row = self._add_list_to_excel(sheet, element, styles, start_row)
                elif section_type == "paragraph":
                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
                elif section_type == "heading":
                    start_row = self._add_heading_to_excel(sheet, element, styles, start_row)
                else:
                    start_row = self._add_paragraph_to_excel(sheet, element, styles, start_row)
            return start_row
        except Exception as e:
            self.logger.warning(f"Could not add section to sheet: {str(e)}")
            return start_row + 1
    def _add_table_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a table element to Excel sheet."""
        try:
            # In canonical JSON format, table elements have headers and rows directly
            headers = element.get("headers", [])
            rows = element.get("rows", [])
            if not headers and not rows:
                return start_row
            # Add headers
            header_style = styles.get("table_header", {})
            for col, header in enumerate(headers, 1):
                cell = sheet.cell(row=start_row, column=col, value=header)
                if header_style.get("bold"):
                    cell.font = Font(bold=True, color=self._get_safe_color(header_style.get("text_color", "FF000000")))
                if header_style.get("background"):
                    cell.fill = PatternFill(start_color=self._get_safe_color(header_style["background"]), end_color=self._get_safe_color(header_style["background"]), fill_type="solid")
            start_row += 1
            # Add rows
            cell_style = styles.get("table_cell", {})
            for row_data in rows:
                for col, cell_value in enumerate(row_data, 1):
                    cell = sheet.cell(row=start_row, column=col, value=cell_value)
                    if cell_style.get("text_color"):
                        cell.font = Font(color=self._get_safe_color(cell_style["text_color"]))
                start_row += 1
            return start_row
        except Exception as e:
            self.logger.warning(f"Could not add table to Excel: {str(e)}")
            return start_row + 1
    def _add_list_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a list element to Excel sheet."""
        try:
            list_items = element.get("items", [])
            list_style = styles.get("bullet_list", {})
            for item in list_items:
                sheet.cell(row=start_row, column=1, value=f"• {item}")
                if list_style.get("color"):
                    sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(list_style["color"]))
                start_row += 1
            return start_row
        except Exception as e:
            self.logger.warning(f"Could not add list to Excel: {str(e)}")
            return start_row + 1
    def _add_paragraph_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a paragraph element to Excel sheet."""
        try:
            text = element.get("text", "")
            if text:
                sheet.cell(row=start_row, column=1, value=text)
                paragraph_style = styles.get("paragraph", {})
                if paragraph_style.get("color"):
                    sheet.cell(row=start_row, column=1).font = Font(color=self._get_safe_color(paragraph_style["color"]))
                start_row += 1
            return start_row
        except Exception as e:
            self.logger.warning(f"Could not add paragraph to Excel: {str(e)}")
            return start_row + 1
    def _add_heading_to_excel(self, sheet, element: Dict[str, Any], styles: Dict[str, Any], start_row: int) -> int:
        """Add a heading element to Excel sheet."""
        try:
            text = element.get("text", "")
            level = element.get("level", 1)
            if text:
                sheet.cell(row=start_row, column=1, value=text)
                heading_style = styles.get("heading", {})
                font_size = heading_style.get("font_size", 14)
                if level > 1:
                    font_size = max(10, font_size - (level - 1) * 2)
                sheet.cell(row=start_row, column=1).font = Font(
                    size=font_size, 
                    bold=True, 
                    color=self._get_safe_color(heading_style.get("color", "FF000000"))
                )
                start_row += 1
            return start_row
        except Exception as e:
            self.logger.warning(f"Could not add heading to Excel: {str(e)}")
            return start_row + 1
    def _format_timestamp(self) -> str:
        """Format current timestamp for document generation."""
        return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC")
--- a/modules/services/serviceGeneration/renderers/text_renderer.py
+++ b/modules/services/serviceGeneration/renderers/text_renderer.py
@ -1,94 +0,0 @@
 """
 Text renderer for report generation.
 """
 from .base_renderer import BaseRenderer
 from typing import Dict, Any, Tuple, List
 class TextRenderer(BaseRenderer):
    """Renders content to plain text format with format-specific extraction."""
    @classmethod
    def get_supported_formats(cls) -> List[str]:
        """Return supported text formats (excluding formats with dedicated renderers)."""
        return [
            'txt', 'text', 'plain',
            # Programming languages
            'js', 'javascript', 'ts', 'typescript', 'jsx', 'tsx',
            'py', 'python', 'java', 'cpp', 'c', 'h', 'hpp',
            'cs', 'csharp', 'php', 'rb', 'ruby', 'go', 'rs', 'rust',
            'swift', 'kt', 'kotlin', 'scala', 'r', 'm', 'objc',
            'sh', 'bash', 'zsh', 'fish', 'ps1', 'bat', 'cmd',
            # Web technologies (excluding html/htm which have dedicated renderer)
            'css', 'scss', 'sass', 'less', 'xml', 'yaml', 'yml', 'toml', 'ini', 'cfg',
            # Data formats (excluding csv, md/markdown which have dedicated renderers)
            'tsv', 'log', 'rst', 'sql', 'dockerfile', 'dockerignore', 'gitignore',
            # Configuration files
            'env', 'properties', 'conf', 'config', 'rc',
            'gitattributes', 'editorconfig', 'eslintrc',
            # Documentation
            'readme', 'changelog', 'license', 'authors',
            'contributing', 'todo', 'notes', 'docs'
        ]
    @classmethod
    def get_format_aliases(cls) -> List[str]:
        """Return format aliases."""
        return [
            'ascii', 'utf8', 'utf-8', 'code', 'source',
            'script', 'program', 'file', 'document',
            'raw', 'unformatted', 'plaintext'
        ]
    @classmethod
    def get_priority(cls) -> int:
        """Return priority for text renderer."""
        return 90
    def getExtractionPrompt(self, user_prompt: str, title: str) -> str:
        """Return only plain-text guidelines; global prompt is built centrally."""
        return (
            "TEXT FORMAT GUIDELINES:\n"
            "- Output ONLY plain text (no markdown or HTML).\n"
            "- Use clear headings (you may underline with === or --- when helpful).\n"
            "- Use simple bullet lists with '-' and tables with '|' when needed.\n"
            "- Preserve indentation for code-like content if present.\n"
            "OUTPUT: Return ONLY the raw text content."
        )
    async def render(self, extracted_content: str, title: str) -> Tuple[str, str]:
        """Render extracted content to plain text format."""
        try:
            # The extracted content should already be formatted text from the AI
            # Just clean it up
            text_content = self._clean_text_content(extracted_content, title)
            return text_content, "text/plain"
        except Exception as e:
            self.logger.error(f"Error rendering text: {str(e)}")
            # Return minimal text fallback
            return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
    def _clean_text_content(self, content: str, title: str) -> str:
        """Clean and validate text content from AI."""
        content = content.strip()
        # Remove markdown code blocks if present
        if content.startswith("```") and content.endswith("```"):
            lines = content.split('\n')
            if len(lines) > 2:
                content = '\n'.join(lines[1:-1]).strip()
        # Remove any remaining markdown formatting
        content = content.replace('**', '').replace('*', '')
        content = content.replace('__', '').replace('_', '')
        # Clean up any HTML-like tags that might have slipped through
        import re
        content = re.sub(r'<[^>]+>', '', content)
        # Ensure proper line endings
        content = content.replace('\r\n', '\n').replace('\r', '\n')
        return content
--- a/modules/services/serviceGeneration/subJsonSchema.py
+++ b/modules/services/serviceGeneration/subJsonSchema.py
@ -0,0 +1,517 @@
 """
 JSON Schema definitions for AI-generated document structures.
 This module provides schemas that guide AI to generate structured JSON output.
 """
 from typing import Dict, Any
 def get_multi_document_subJsonSchema() -> Dict[str, Any]:
    """Get the JSON schema for multi-document generation."""
    return {
        "type": "object",
        "required": ["metadata", "documents"],
        "properties": {
            "metadata": {
                "type": "object",
                "required": ["title", "splitStrategy"],
                "properties": {
                    "title": {"type": "string", "description": "Document title"},
                    "splitStrategy": {
                        "type": "string",
                        "enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
                        "description": "Strategy for splitting content into multiple files"
                    },
                    "splitCriteria": {
                        "type": "object",
                        "description": "Custom criteria for splitting (e.g., entity_id, category, etc.)"
                    },
                    "fileNamingPattern": {
                        "type": "string",
                        "description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
                    },
                    "author": {"type": "string", "description": "Document author (optional)"},
                    "source_documents": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of source document IDs"
                    },
                    "extraction_method": {
                        "type": "string",
                        "default": "ai_extraction",
                        "description": "Method used for extraction"
                    }
                }
            },
            "documents": {
                "type": "array",
                "description": "Array of individual documents to generate",
                "items": {
                    "type": "object",
                    "required": ["id", "title", "sections", "filename"],
                    "properties": {
                        "id": {"type": "string", "description": "Unique document identifier"},
                        "title": {"type": "string", "description": "Document title"},
                        "filename": {"type": "string", "description": "Generated filename"},
                        "sections": {
                            "type": "array",
                            "description": "Document sections containing structured content",
                            "items": {
                                "type": "object",
                                "required": ["id", "content_type", "elements", "order"],
                                "properties": {
                                    "id": {"type": "string", "description": "Unique section identifier"},
                                    "title": {"type": "string", "description": "Section title (optional)"},
                                    "content_type": {
                                        "type": "string",
                                        "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
                                        "description": "Primary content type of this section"
                                    },
                                    "elements": {
                                        "type": "array",
                                        "description": "Content elements in this section",
                                        "items": {
                                            "oneOf": [
                                                {"$ref": "#/definitions/table"},
                                                {"$ref": "#/definitions/bullet_list"},
                                                {"$ref": "#/definitions/paragraph"},
                                                {"$ref": "#/definitions/heading"},
                                                {"$ref": "#/definitions/code_block"}
                                            ]
                                        }
                                    },
                                    "order": {"type": "integer", "description": "Section order in document"},
                                    "metadata": {
                                        "type": "object",
                                        "description": "Additional section metadata"
                                    }
                                }
                            }
                        },
                        "metadata": {
                            "type": "object",
                            "description": "Document-specific metadata"
                        }
                    }
                }
            }
        },
        "definitions": {
            "table": {
                "type": "object",
                "required": ["headers", "rows"],
                "properties": {
                    "headers": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "Table column headers"
                    },
                    "rows": {
                        "type": "array",
                        "items": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "description": "Table data rows"
                    },
                    "caption": {
                        "type": "string",
                        "description": "Table caption (optional)"
                    }
                }
            },
            "bullet_list": {
                "type": "object",
                "required": ["items"],
                "properties": {
                    "items": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "required": ["text"],
                            "properties": {
                                "text": {"type": "string", "description": "List item text"},
                                "subitems": {
                                    "type": "array",
                                    "items": {"$ref": "#/definitions/list_item"},
                                    "description": "Nested sub-items (optional)"
                                }
                            }
                        },
                        "description": "List items"
                    },
                    "list_type": {
                        "type": "string",
                        "enum": ["bullet", "numbered", "checklist"],
                        "default": "bullet",
                        "description": "Type of list"
                    }
                }
            },
            "list_item": {
                "type": "object",
                "required": ["text"],
                "properties": {
                    "text": {"type": "string", "description": "List item text"},
                    "subitems": {
                        "type": "array",
                        "items": {"$ref": "#/definitions/list_item"},
                        "description": "Nested sub-items (optional)"
                    }
                }
            },
            "paragraph": {
                "type": "object",
                "required": ["text"],
                "properties": {
                    "text": {"type": "string", "description": "Paragraph text"},
                    "formatting": {
                        "type": "object",
                        "description": "Text formatting (bold, italic, etc.)"
                    }
                }
            },
            "heading": {
                "type": "object",
                "required": ["text", "level"],
                "properties": {
                    "text": {"type": "string", "description": "Heading text"},
                    "level": {
                        "type": "integer",
                        "minimum": 1,
                        "maximum": 6,
                        "description": "Heading level (1-6)"
                    }
                }
            },
            "code_block": {
                "type": "object",
                "required": ["code"],
                "properties": {
                    "code": {"type": "string", "description": "Code content"},
                    "language": {"type": "string", "description": "Programming language (optional)"}
                }
            }
        }
    }
 def get_document_subJsonSchema() -> Dict[str, Any]:
    """Get the JSON schema for structured document generation (single document)."""
    return {
        "type": "object",
        "required": ["metadata", "sections"],
        "properties": {
            "metadata": {
                "type": "object",
                "required": ["title"],
                "properties": {
                    "title": {"type": "string", "description": "Document title"},
                    "author": {"type": "string", "description": "Document author (optional)"},
                    "source_documents": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "List of source document IDs"
                    },
                    "extraction_method": {
                        "type": "string",
                        "default": "ai_extraction",
                        "description": "Method used for extraction"
                    }
                }
            },
            "sections": {
                "type": "array",
                "description": "Document sections containing structured content",
                "items": {
                    "type": "object",
                    "required": ["id", "content_type", "elements", "order"],
                    "properties": {
                        "id": {"type": "string", "description": "Unique section identifier"},
                        "title": {"type": "string", "description": "Section title (optional)"},
                        "content_type": {
                            "type": "string",
                            "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
                            "description": "Primary content type of this section"
                        },
                        "elements": {
                            "type": "array",
                            "description": "Content elements in this section",
                            "items": {
                                "oneOf": [
                                    {"$ref": "#/definitions/table"},
                                    {"$ref": "#/definitions/bullet_list"},
                                    {"$ref": "#/definitions/paragraph"},
                                    {"$ref": "#/definitions/heading"},
                                    {"$ref": "#/definitions/code_block"}
                                ]
                            }
                        },
                        "order": {"type": "integer", "description": "Section order in document"},
                        "metadata": {
                            "type": "object",
                            "description": "Additional section metadata"
                        }
                    }
                }
            },
            "summary": {
                "type": "string",
                "description": "Document summary (optional)"
            },
            "tags": {
                "type": "array",
                "items": {"type": "string"},
                "description": "Document tags for categorization"
            }
        },
        "definitions": {
            "table": {
                "type": "object",
                "required": ["headers", "rows"],
                "properties": {
                    "headers": {
                        "type": "array",
                        "items": {"type": "string"},
                        "description": "Table column headers"
                    },
                    "rows": {
                        "type": "array",
                        "items": {
                            "type": "array",
                            "items": {"type": "string"}
                        },
                        "description": "Table data rows"
                    },
                    "caption": {
                        "type": "string",
                        "description": "Table caption (optional)"
                    }
                }
            },
            "bullet_list": {
                "type": "object",
                "required": ["items"],
                "properties": {
                    "items": {
                        "type": "array",
                        "items": {
                            "type": "object",
                            "required": ["text"],
                            "properties": {
                                "text": {"type": "string", "description": "List item text"},
                                "subitems": {
                                    "type": "array",
                                    "items": {"$ref": "#/definitions/list_item"},
                                    "description": "Nested sub-items (optional)"
                                }
                            }
                        },
                        "description": "List items"
                    },
                    "list_type": {
                        "type": "string",
                        "enum": ["bullet", "numbered", "checklist"],
                        "default": "bullet",
                        "description": "Type of list"
                    }
                }
            },
            "list_item": {
                "type": "object",
                "required": ["text"],
                "properties": {
                    "text": {"type": "string", "description": "List item text"},
                    "subitems": {
                        "type": "array",
                        "items": {"$ref": "#/definitions/list_item"},
                        "description": "Nested sub-items (optional)"
                    }
                }
            },
            "paragraph": {
                "type": "object",
                "required": ["text"],
                "properties": {
                    "text": {"type": "string", "description": "Paragraph text"},
                    "formatting": {
                        "type": "object",
                        "description": "Text formatting (bold, italic, etc.)"
                    }
                }
            },
            "heading": {
                "type": "object",
                "required": ["text", "level"],
                "properties": {
                    "text": {"type": "string", "description": "Heading text"},
                    "level": {
                        "type": "integer",
                        "minimum": 1,
                        "maximum": 6,
                        "description": "Heading level (1-6)"
                    }
                }
            },
            "code_block": {
                "type": "object",
                "required": ["code"],
                "properties": {
                    "code": {"type": "string", "description": "Code content"},
                    "language": {"type": "string", "description": "Programming language (optional)"}
                }
            }
        }
    }
 def get_extraction_prompt_template() -> str:
    """Get the template for AI extraction prompts that request JSON output."""
    return """
 You are extracting structured content from documents. Your task is to analyze the provided content and generate a structured JSON document.
 IMPORTANT: You must respond with valid JSON only. No additional text, explanations, or formatting outside the JSON structure.
 JSON Schema Requirements:
 - Extract the actual data from the source documents
 - If content is a table, extract it as a table with headers and rows
 - If content is a list, extract it as a structured list with items
 - If content is text, extract it as paragraphs or headings
 - Preserve the original structure and data - do not summarize or interpret
 - Use the exact JSON schema provided
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
 2. Lists: Extract all items with proper nesting
 3. Headings: Extract with appropriate levels
 4. Paragraphs: Extract as structured text
 5. Code: Extract code blocks with language identification
 Return only the JSON structure following the schema. Do not include any text before or after the JSON.
 """
 def get_generation_prompt_template() -> str:
    """Get the template for AI generation prompts that work with JSON input."""
    return """
 You are generating a document from structured JSON data. Your task is to create a well-formatted document based on the provided structured content.
 IMPORTANT: You must respond with valid JSON only, following the document schema.
 Generation Guidelines:
 - Use the provided JSON structure as the foundation
 - Enhance the content with proper formatting and organization
 - Ensure logical flow and readability
 - Maintain the original data integrity
 - Add appropriate headings and sections
 - Organize content in a logical sequence
 Content Enhancement:
 - Tables: Ensure proper headers and data alignment
 - Lists: Use appropriate list types (bullet, numbered, checklist)
 - Headings: Use appropriate heading levels for hierarchy
 - Paragraphs: Ensure proper text flow and formatting
 - Code: Preserve code blocks with proper language identification
 Return only the enhanced JSON structure following the schema. Do not include any text before or after the JSON.
 """
 def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
    """Automatically select appropriate schema based on prompt analysis."""
    if prompt_analysis and prompt_analysis.get("is_multi_file", False):
        return get_multi_document_subJsonSchema()
    else:
        return get_document_subJsonSchema()
 def validate_json_document(json_data: Dict[str, Any]) -> bool:
    """Validate that the JSON data follows the document schema."""
    try:
        # Basic validation - check required fields
        if not isinstance(json_data, dict):
            return False
        # Check if it's multi-document or single-document structure
        if "documents" in json_data:
            # Multi-document structure
            if "metadata" not in json_data:
                return False
            metadata = json_data["metadata"]
            if not isinstance(metadata, dict) or "title" not in metadata or "splitStrategy" not in metadata:
                return False
            documents = json_data["documents"]
            if not isinstance(documents, list):
                return False
            # Validate each document
            for doc in documents:
                if not isinstance(doc, dict):
                    return False
                required_fields = ["id", "title", "sections", "filename"]
                for field in required_fields:
                    if field not in doc:
                        return False
                # Validate sections in each document
                sections = doc.get("sections", [])
                if not isinstance(sections, list):
                    return False
                for section in sections:
                    if not isinstance(section, dict):
                        return False
                    section_required = ["id", "content_type", "elements", "order"]
                    for field in section_required:
                        if field not in section:
                            return False
                    # Validate content_type
                    valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
                    if section["content_type"] not in valid_types:
                        return False
                    # Validate elements
                    if not isinstance(section["elements"], list):
                        return False
        elif "sections" in json_data:
            # Single-document structure (existing validation)
            if "metadata" not in json_data:
                return False
            metadata = json_data["metadata"]
            if not isinstance(metadata, dict) or "title" not in metadata:
                return False
            sections = json_data["sections"]
            if not isinstance(sections, list):
                return False
            # Validate each section
            for i, section in enumerate(sections):
                if not isinstance(section, dict):
                    return False
                required_fields = ["id", "content_type", "elements", "order"]
                for field in required_fields:
                    if field not in section:
                        return False
                # Validate content_type
                valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
                if section["content_type"] not in valid_types:
                    return False
                # Validate elements
                if not isinstance(section["elements"], list):
                    return False
        else:
            return False
        return True
    except Exception:
        return False
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@ -0,0 +1,738 @@
 """
 Prompt builder for AI document generation and extraction.
 This module builds prompts for AI services to extract and generate documents.
 """
 import json
 import logging
 from typing import Dict, Any, Optional, List, TYPE_CHECKING
 from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
 # Type hint for renderer parameter
 if TYPE_CHECKING:
    from .renderers.rendererBaseTemplate import BaseRenderer
    _RendererLike = BaseRenderer
 else:
    _RendererLike = Any
 logger = logging.getLogger(__name__)
 async def buildAdaptiveExtractionPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    promptAnalysis: Dict[str, Any],
    aiService=None,
    services=None
 ) -> str:
    """
    Build adaptive extraction prompt based on AI analysis.
    Uses multi-file or single-file approach based on analysis.
    """
    # Multi-file example data instead of schema
    multi_file_example = {
        "metadata": {
            "title": "Multi-Document Example",
            "splitStrategy": "by_section",
            "source_documents": ["doc_001"],
            "extraction_method": "ai_extraction"
        },
        "documents": [
            {
                "id": "doc_section_1",
                "title": "Section 1 Title",
                "filename": "section_1.xlsx",
                "sections": [
                    {
                        "id": "section_1",
                        "content_type": "heading",
                        "elements": [
                            {
                                "level": 1,
                                "text": "1. SECTION TITLE"
                            }
                        ],
                        "order": 1
                    },
                    {
                        "id": "section_2",
                        "content_type": "paragraph",
                        "elements": [
                            {
                                "text": "This is the actual content that should be extracted from the document."
                            }
                        ],
                        "order": 2
                    },
                    {
                        "id": "section_3",
                        "content_type": "table",
                        "elements": [
                            {
                                "headers": ["Column 1", "Column 2"],
                                "rows": [["Value 1", "Value 2"]]
                            }
                        ],
                        "order": 3
                    }
                ]
            }
        ]
    }
    # Single-file example data instead of schema
    single_file_example = {
        "metadata": {
            "title": "Single Document Example",
            "source_documents": ["doc_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_1",
                "content_type": "heading",
                "elements": [
                    {
                        "level": 1,
                        "text": "1. SECTION TITLE"
                    }
                ],
                "order": 1
            },
            {
                "id": "section_2",
                "content_type": "paragraph",
                "elements": [
                    {
                        "text": "This is the actual content that should be extracted from the document."
                    }
                ],
                "order": 2
            },
            {
                "id": "section_3",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2"],
                        "rows": [["Value 1", "Value 2"]]
                    }
                ],
                "order": 3
            }
        ]
    }
    if promptAnalysis.get("is_multi_file", False):
        # Multi-file prompt
        adaptive_prompt = f"""
 {userPrompt}
 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
 TASK: Extract the actual content from the document and organize it into separate sections, where each section will become a separate file.
 REQUIREMENTS:
 1. Analyze the document content provided in the context below
 2. Identify distinct sections in the document (by headings, topics, or logical breaks)
 3. Create one JSON document entry for each section found
 4. Extract the real content from each section (headings, paragraphs, lists, etc.)
 5. Generate appropriate filenames for each section
 CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(multi_file_example, indent=2)}
 IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
 - "id": unique identifier
 - "title": section title from the document
 - "filename": appropriate filename for the section
 - "sections": array of content sections
 DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
 INSTRUCTIONS:
 - Replace "REPLACE_WITH_ACTUAL_*" placeholders with real content from the document
 - Use actual section titles, headings, and text from the document
 - Create meaningful filenames based on section content
 - Ensure each section contains the complete content for that part of the document
 - Do not use generic placeholder text like "Section 1", "Section 2"
 - Extract real headings, paragraphs, lists, and other content elements
 - CRITICAL: Return JSON with "documents" array, not "sections" array
 CONTEXT (Document Content):
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
 2. Lists: Extract all items with proper nesting  
 3. Headings: Extract with appropriate levels
 4. Paragraphs: Extract as structured text
 5. Code: Extract code blocks with language identification
 6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
 Image Analysis Requirements:
 - If you cannot analyze an image for any reason, explain why in the JSON response
 - Describe everything you see in the image
 - Include all text content, tables, logos, graphics, layout, and visual elements
 - If the image is too small, corrupted, or unclear, explain this
 - Always provide feedback - never return empty responses
 Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 """.strip()
    else:
        # Single-file prompt - use example data instead of schema
        adaptive_prompt = f"""
 {userPrompt}
 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
 TASK: Extract the actual content from the document and organize it into structured sections.
 REQUIREMENTS:
 1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(single_file_example, indent=2)}
 INSTRUCTIONS:
 - Replace example data with actual content from the document
 - Use actual headings, paragraphs, and text from the document
 - Ensure all content is properly structured
 - Do not use generic placeholder text
 - Extract real content from the documents
 CONTEXT (Document Content):
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
 2. Lists: Extract all items with proper nesting  
 3. Headings: Extract with appropriate levels
 4. Paragraphs: Extract as structured text
 5. Code: Extract code blocks with language identification
 6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
 Image Analysis Requirements:
 - If you cannot analyze an image for any reason, explain why in the JSON response
 - Describe everything you see in the image
 - Include all text content, tables, logos, graphics, layout, and visual elements
 - If the image is too small, corrupted, or unclear, explain this
 - Always provide feedback - never return empty responses
 Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 """.strip()
    return adaptive_prompt
 async def buildGenericExtractionPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
 ) -> str:
    """Build generic extraction prompt that works for both single and multi-file."""
    # Use AI to determine the best approach
    if aiService:
        try:
            analysis_prompt = f"""
 Analyze this user request and determine the best JSON structure for document extraction.
 User request: "{userPrompt}"
 Respond with JSON only:
 {{
    "requires_multi_file": true/false,
    "recommended_schema": "single_document|multi_document",
    "split_approach": "description of how to organize content",
    "file_naming": "suggested naming pattern"
 }}
 Consider the user's intent and the most logical way to organize the extracted content.
 """
            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
            response = await aiService.aiObjects.call(request)
            if response and response.content:
                import re
                result = response.content.strip()
                json_match = re.search(r'\{.*\}', result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                analysis = json.loads(result)
                # Use analysis to build appropriate prompt
                return await buildAdaptiveExtractionPrompt(
                    outputFormat, userPrompt, title, analysis, aiService, services
                )
        except Exception as e:
            services.utils.debugLogToFile(f"Generic prompt analysis failed: {str(e)}", "PROMPT_BUILDER")
    # Fallback to single-file prompt
    example_data = {
        "metadata": {
            "title": "Example Document",
            "author": "AI Assistant",
            "source_documents": ["document_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_001",
                "content_type": "heading",
                "elements": [
                    {
                        "level": 1,
                        "text": "1. SECTION TITLE"
                    }
                ],
                "order": 1,
                "metadata": {}
            }
        ],
        "summary": "",
        "tags": []
    }
    return f"""
 {userPrompt}
 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
 TASK: Extract the actual content from the document and organize it into structured sections.
 REQUIREMENTS:
 1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(example_data, indent=2)}
 Requirements:
 - Preserve all original data - do not summarize or interpret
 - Use the exact JSON format shown above
 - Maintain data integrity and structure
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
 2. Lists: Extract all items with proper nesting  
 3. Headings: Extract with appropriate levels
 4. Paragraphs: Extract as structured text
 5. Code: Extract code blocks with language identification
 6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
 Image Analysis Requirements:
 - If you cannot analyze an image for any reason, explain why in the JSON response
 - Describe everything you see in the image
 - Include all text content, tables, logos, graphics, layout, and visual elements
 - If the image is too small, corrupted, or unclear, explain this
 - Always provide feedback - never return empty responses
 Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 DO NOT return a schema description - return actual extracted content in the JSON format shown above.
 """
 async def buildExtractionPrompt(
    outputFormat: str,
    renderer: _RendererLike,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
 ) -> str:
    """
    Build the final extraction prompt by combining:
    - Parsed extraction intent from user prompt (using AI)
    - Generic cross-format instructions (filename header + real-data policy)
    - Format-specific guidelines snippet provided by the renderer
    The AI must place a single filename header at the very top:
    FILENAME: <safe-file-name-with-extension>
    followed by a blank line and then ONLY the document content according to the target format.
    """
    # Parse user prompt to separate extraction intent from generation format using AI
    extractionIntent = await _parseExtractionIntent(userPrompt, outputFormat, aiService, services)
    # Import JSON schema for structured output
    from .subJsonSchema import get_document_subJsonSchema
    jsonSchema = get_document_subJsonSchema()
    # Generic block for JSON extraction - use mixed example data showing different content types
    example_data = {
        "metadata": {
            "title": "Example Document",
            "author": "AI Assistant",
            "source_documents": ["document_001"],
            "extraction_method": "ai_extraction"
        },
        "sections": [
            {
                "id": "section_001",
                "content_type": "heading",
                "elements": [
                    {
                        "level": 1,
                        "text": "1. INTRODUCTION"
                    }
                ],
                "order": 1,
                "metadata": {}
            },
            {
                "id": "section_002",
                "content_type": "paragraph",
                "elements": [
                    {
                        "text": "This is a sample paragraph with actual content that should be extracted from the document."
                    }
                ],
                "order": 2,
                "metadata": {}
            },
            {
                "id": "section_003",
                "content_type": "table",
                "elements": [
                    {
                        "headers": ["Column 1", "Column 2", "Column 3"],
                        "rows": [
                            ["Value 1", "Value 2", "Value 3"],
                            ["Value 4", "Value 5", "Value 6"]
                        ]
                    }
                ],
                "order": 3,
                "metadata": {}
            }
        ],
        "summary": "",
        "tags": []
    }
    genericIntro = f"""
 {extractionIntent}
 You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
 TASK: Extract the actual content from the document and organize it into structured sections.
 REQUIREMENTS:
 1. Analyze the document content provided in the context below
 2. Extract all content and organize it into logical sections
 3. Create structured JSON with sections containing the extracted content
 4. Preserve the original structure and data
 OUTPUT FORMAT: Return only valid JSON in this exact structure:
 {json.dumps(example_data, indent=2)}
 Requirements:
 - Preserve all original data - do not summarize or interpret
 - Use the exact JSON format shown above
 - Maintain data integrity and structure
 Content Types to Extract:
 1. Tables: Extract all rows and columns with proper headers
 2. Lists: Extract all items with proper nesting  
 3. Headings: Extract with appropriate levels
 4. Paragraphs: Extract as structured text
 5. Code: Extract code blocks with language identification
 6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
 Image Analysis Requirements:
 - If you cannot analyze an image for any reason, explain why in the JSON response
 - Describe everything you see in the image
 - Include all text content, tables, logos, graphics, layout, and visual elements
 - If the image is too small, corrupted, or unclear, explain this
 - Always provide feedback - never return empty responses
 Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
 Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
 DO NOT return a schema description - return actual extracted content in the JSON format shown above.
 """
    # Get format-specific guidelines from renderer
    formatGuidelines = ""
    try:
        if hasattr(renderer, 'getExtractionGuidelines'):
            formatGuidelines = renderer.getExtractionGuidelines()
    except Exception:
        pass
    # Combine all parts
    finalPrompt = f"{genericIntro}\n\n{formatGuidelines}".strip()
    # Save extraction prompt to debug file - only if debug enabled
    try:
        debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
        if debug_enabled:
            import os
            from datetime import datetime, UTC
            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
            debug_root = "./test-chat/ai"
            os.makedirs(debug_root, exist_ok=True)
            with open(os.path.join(debug_root, f"{ts}_extraction_prompt.txt"), "w", encoding="utf-8") as f:
                f.write(finalPrompt)
    except Exception:
        pass
    return finalPrompt
 async def buildGenerationPrompt(
    outputFormat: str,
    userPrompt: str,
    title: str,
    aiService=None,
    services=None
 ) -> str:
    """
    Use AI to build the generation prompt based on user intent and format requirements.
    Focus on what's important for the user and how to structure the content.
    """
    if not aiService:
        # Fallback if no AI service available
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
    try:
        # Protect userPrompt from injection
        safeUserPrompt = userPrompt.replace('"', '\\"').replace("'", "\\'").replace('\n', ' ').replace('\r', ' ')
        # Debug output
        services.utils.debugLogToFile(f"GENERATION PROMPT REQUEST: buildGenerationPrompt called with outputFormat='{outputFormat}', title='{title}'", "PROMPT_BUILDER")
        # AI call to generate the appropriate generation prompt
        generationPromptRequest = f"""
 You are creating instructions for an AI to generate JSON content in the CANONICAL FORMAT that will be converted to a {outputFormat} document.
 User request: "{safeUserPrompt}"
 Document title: "{title}"
 Target format: {outputFormat}
 Write clear, detailed instructions that tell the AI how to generate JSON content using the CANONICAL JSON FORMAT. Focus on:
 1. What content is most important for the user
 2. How to structure and organize the content using the canonical JSON format with 'sections'
 3. Specific formatting requirements for the target format
 4. Language requirements to preserve
 5. How to ensure the JSON content meets the user's needs
 CRITICAL: The AI MUST generate content using the CANONICAL JSON FORMAT with this exact structure:
 {{
  "metadata": {{
    "title": "Document Title"
  }},
  "sections": [
    {{
      "id": "section_1",
      "content_type": "heading",
      "elements": [
        {{
          "level": 1,
          "text": "1. SECTION TITLE"
        }}
      ],
      "order": 1
    }},
    {{
      "id": "section_2",
      "content_type": "paragraph",
      "elements": [
        {{
          "text": "This is the actual content that should be extracted from the document."
        }}
      ],
      "order": 2
    }},
    {{
      "id": "section_3",
      "content_type": "table",
      "elements": [
        {{
          "headers": ["Column 1", "Column 2", "Column 3"],
          "rows": [
            ["Value 1", "Value 2", "Value 3"],
            ["Value 4", "Value 5", "Value 6"]
          ]
        }}
      ],
      "order": 3
    }}
  ]
 }}
 The AI should NOT create format-specific structures like "sheets" or "columns" - only use the canonical format with "sections" and "elements".
 Write the instructions as plain text, not JSON. Start with "Generate JSON content that..." and provide clear, actionable instructions for creating structured JSON data in the canonical format.
 """
        # Call AI service to generate the prompt
        services.utils.debugLogToFile("GENERATION PROMPT REQUEST: Calling AI for generation prompt...", "PROMPT_BUILDER")
        # Import and set proper options for AI call
        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
        request_options = AiCallOptions()
        request_options.operationType = OperationType.GENERAL
        request = AiCallRequest(prompt=generationPromptRequest, context="", options=request_options)
        response = await aiService.aiObjects.call(request)
        result = response.content if response else ""
        # Replace the placeholder that the AI created with actual format rules
        if result:
            formatRules = _getFormatRules(outputFormat)
            result = result.replace("PLACEHOLDER_FOR_FORMAT_RULES", formatRules)
        # Debug output
        services.utils.debugLogToFile(f"GENERATION PROMPT: Generated successfully", "PROMPT_BUILDER")
        # Save full generation prompt and AI response to debug file - only if debug enabled
        try:
            debug_enabled = services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if debug_enabled:
                import os
                from datetime import datetime, UTC
                ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
                debug_root = "./test-chat/ai"
                os.makedirs(debug_root, exist_ok=True)
                with open(os.path.join(debug_root, f"{ts}_generation_prompt.txt"), "w", encoding="utf-8") as f:
                    f.write(f"GENERATION PROMPT REQUEST:\n{generationPromptRequest}\n\n")
                    f.write(f"GENERATION PROMPT AI RESPONSE:\n{response.content if response else 'No response'}\n\n")
                    f.write(f"GENERATION PROMPT FINAL:\n{result if result else 'None'}\n")
        except Exception:
            pass
        return result if result else f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content."
    except Exception as e:
        # Fallback on any error - preserve user prompt for language instructions
        services.utils.debugLogToFile(f"DEBUG: AI generation prompt failed: {str(e)}", "PROMPT_BUILDER")
        return f"Generate a comprehensive {outputFormat} document titled '{title}' based on the extracted content. User requirements: {userPrompt}"
 def _getFormatRules(outputFormat: str) -> str:
    """
    Get format-specific rules for the generation prompt.
    """
    format_rules = {
        "xlsx": """
 XLSX Format Rules:
 - Create tables with clear headers and organized data
 - Use appropriate column widths and formatting
 - Include summary information if relevant
 - Ensure data is properly structured for spreadsheet analysis
 """,
        "pdf": """
 PDF Format Rules:
 - Create professional document layout
 - Use appropriate headings and sections
 - Include proper spacing and formatting
 - Ensure content is well-organized and readable
 """,
        "docx": """
 DOCX Format Rules:
 - Create professional document layout
 - Use appropriate headings and sections
 - Include proper spacing and formatting
 - Ensure content is well-organized and readable
 """,
        "html": """
 HTML Format Rules:
 - Create clean, semantic HTML structure
 - Use appropriate tags for content organization
 - Include proper styling classes
 - Ensure content is accessible and well-formatted
 """,
        "json": """
 JSON Format Rules:
 - Create well-structured JSON data
 - Use appropriate nesting and organization
 - Include metadata and context information
 - Ensure data is properly formatted and valid
 """,
        "csv": """
 CSV Format Rules:
 - Create clear, organized tabular data
 - Use appropriate headers and data types
 - Ensure proper CSV formatting
 - Include all relevant data in structured format
 """,
        "txt": """
 TXT Format Rules:
 - Create clean, readable text format
 - Use appropriate spacing and organization
 - Include clear headings and sections
 - Ensure content is well-structured and easy to read
 """
    }
    return format_rules.get(outputFormat.lower(), f"""
 {outputFormat.upper()} Format Rules:
 - Create well-structured content appropriate for {outputFormat}
 - Use appropriate formatting and organization
 - Ensure content is clear and professional
 - Include all relevant information in proper format
 """)
 async def _parseExtractionIntent(userPrompt: str, outputFormat: str, aiService=None, services=None) -> str:
    """
    Parse user prompt to extract the core extraction intent.
    """
    if not aiService:
        return f"Extract content from the provided documents and create a {outputFormat} report."
    try:
        analysis_prompt = f"""
 Analyze this user request and extract the core extraction intent:
 User request: "{userPrompt}"
 Target format: {outputFormat}
 Extract the main intent and requirements for document processing. Focus on:
 1. What content needs to be extracted
 2. How it should be organized
 3. Any specific requirements or preferences
 Respond with a clear, concise statement of the extraction intent.
 """
        from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
        request_options = AiCallOptions()
        request_options.operationType = OperationType.GENERAL
        request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
        response = await aiService.aiObjects.call(request)
        if response and response.content:
            return response.content.strip()
        else:
            return f"Extract content from the provided documents and create a {outputFormat} report."
    except Exception as e:
        services.utils.debugLogToFile(f"Extraction intent analysis failed: {str(e)}", "PROMPT_BUILDER")
        return f"Extract content from the provided documents and create a {outputFormat} report."
--- a/modules/services/serviceNeutralization/mainServiceNeutralization.py
+++ b/modules/services/serviceNeutralization/mainServiceNeutralization.py
@ -32,7 +32,7 @@ class NeutralizationService:
            serviceCenter: Service center instance for accessing other services
            NamesToParse: List of names to parse and replace (case-insensitive)
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.interfaceDbApp = serviceCenter.interfaceDbApp
        # Initialize anonymization processors
--- a/modules/services/serviceNormalization/mainServiceNormalization.py
+++ b/modules/services/serviceNormalization/mainServiceNormalization.py
@ -0,0 +1,264 @@
 import json
 import os
 from typing import Any, Dict, List, Set
 from datetime import datetime, UTC
 class NormalizationService:
    """
    Produces a single canonical table in merged JSON using an AI-provided header mapping
    and deterministic, in-code value normalization. No language heuristics in code.
    """
    def __init__(self, services):
        self.services = services
    # Public API
    def discoverStructures(self, mergedJson: Dict[str, Any]) -> Dict[str, Any]:
        headers: Set[str] = set()
        samples: Dict[str, List[str]] = {}
        sections = mergedJson.get("sections", []) if isinstance(mergedJson, dict) else []
        for section in sections:
            if not isinstance(section, dict):
                continue
            # Use only the fundamental agreed JSON structure: content_type/elements
            if section.get("content_type") != "table":
                continue
            # Extract table data from elements array
            hdrs = []
            rows = []
            for element in section.get("elements", []):
                if isinstance(element, dict) and "headers" in element and "rows" in element:
                    hdrs = element.get("headers") or []
                    rows = element.get("rows") or []
                    break
            if not hdrs or not rows:
                continue
            for h in hdrs:
                if not isinstance(h, str):
                    continue
                headers.add(h)
            # collect small value samples by column index
            for row in rows[:5]:
                if not isinstance(row, list):
                    continue
                for i, value in enumerate(row):
                    headerName = hdrs[i] if i < len(hdrs) else f"col_{i}"
                    if headerName not in samples:
                        samples[headerName] = []
                    if len(samples[headerName]) < 5:
                        samples[headerName].append(str(value))
        return {
            "tableHeaders": sorted(list(headers)),
            "headerSamples": samples,
        }
    async def requestHeaderMapping(self, inventory: Dict[str, Any], cacheKey: str, canonicalSpec: Dict[str, Any] | None = None, mergePrompt: str | None = None) -> Dict[str, Any]:
        # Allow caller to specify any canonical schema. If none provided, default to discovered headers.
        if canonicalSpec is None:
            canonicalSpec = {
                "canonicalHeaders": inventory.get("tableHeaders", []),
                "constraints": {}
            }
        # Protect merge prompt context by wrapping in single quotes and escaping internal quotes
        protectedMerge = None
        if mergePrompt:
            try:
                protectedMerge = str(mergePrompt).replace("'", "\\'")
            except Exception:
                protectedMerge = str(mergePrompt)
        prompt = (
            "You are a mapping generator. Return ONLY JSON.\n\n"
            "Given discovered headers and sample values, map them to the canonical headers.\n"
            "Do not invent fields. Use null if no mapping. Provide normalization policy.\n\n"
            f"CANONICAL_SPEC:\n{json.dumps(canonicalSpec, ensure_ascii=False, indent=2)}\n\n"
            f"HEADERS_DISCOVERED:\n{json.dumps(inventory, ensure_ascii=False, indent=2)}\n\n"
            + (f"MERGE_PROMPT_CONTEXT (protected):\n'{protectedMerge}'\n\n" if protectedMerge is not None else "") +
            "REPLY JSON SHAPE:\n(Example)\n"
            "{\n  \"mappings\": {\"<sourceHeader>\": \"<Canonical>|null\"},\n"
            "  \"normalizationPolicy\": {\n    \"TotalAmount\": {\"decimalSeparator\": \",\"|\".\"},\n"
            "    \"Currency\": {\"stripSymbols\": true},\n"
            "    \"Date\": {\"formats\": [\"DD.MM.YYYY\",\"YYYY-MM-DD\"]}\n  }\n}\n"
        )
        response = await self.services.ai.callAi(prompt=prompt)
        if not response:
            return {"mapping": {}, "normalizationPolicy": {}}
        # Extract JSON from response more safely
        start_idx = response.find('{')
        end_idx = response.rfind('}')
        if start_idx == -1 or end_idx == -1 or start_idx >= end_idx:
            return {"mapping": {}, "normalizationPolicy": {}}
        js = response[start_idx:end_idx + 1]
        try:
            mapping = json.loads(js)
        except json.JSONDecodeError:
            return {"mapping": {}, "normalizationPolicy": {}}
        # Normalize key naming from AI: prefer single key "mapping"
        if "mapping" not in mapping and "mappings" in mapping and isinstance(mapping["mappings"], dict):
            mapping["mapping"] = mapping["mappings"]
            try:
                del mapping["mappings"]
            except Exception:
                pass
        # Ensure canonicalHeaders present in mapping for downstream use
        if "canonicalHeaders" not in mapping:
            mapping["canonicalHeaders"] = canonicalSpec.get("canonicalHeaders", [])
        # debug artifact
        self._writeDebugArtifact("mapping.json", mapping)
        return mapping
    def applyMapping(self, mergedJson: Dict[str, Any], mappingSpec: Dict[str, Any]) -> Dict[str, Any]:
        mappings = (mappingSpec or {}).get("mapping", {})
        policy = (mappingSpec or {}).get("normalizationPolicy", {})
        # Prefer headers provided by mapping (generic across domains)
        canonicalHeaders = (mappingSpec or {}).get("canonicalHeaders") or []
        if not canonicalHeaders:
            # Fallback to union of mapped targets
            canonicalHeaders = sorted(list({t for t in mappings.values() if t}))
        rows: List[List[str]] = []
        sections = mergedJson.get("sections", []) if isinstance(mergedJson, dict) else []
        for section in sections:
            # Use only the fundamental agreed JSON structure: content_type/elements
            if section.get("content_type") != "table":
                continue
            # Extract table data from elements array
            sourceHeaders = []
            sourceRows = []
            for element in section.get("elements", []):
                if isinstance(element, dict) and "headers" in element and "rows" in element:
                    sourceHeaders = element.get("headers") or []
                    sourceRows = element.get("rows") or []
                    break
            if not sourceHeaders or not sourceRows:
                continue
            # Build index map: canonical -> source index or None
            indexMap: Dict[str, int] = {}
            for ci, ch in enumerate(canonicalHeaders):
                srcIndex = None
                for si, sh in enumerate(sourceHeaders):
                    # Prefer explicit mapping target; fallback to identity when names match
                    target = mappings.get(sh)
                    if target is None and sh == ch:
                        target = ch
                    if target == ch:
                        srcIndex = si
                        break
                indexMap[ch] = srcIndex
            # Transform rows
            for r in sourceRows:
                canonicalRow: List[str] = []
                for ch in canonicalHeaders:
                    idx = indexMap.get(ch)
                    try:
                        value = r[idx] if (idx is not None and idx < len(r)) else ""
                    except (IndexError, KeyError) as e:
                        # Handle corrupted data gracefully
                        value = ""
                    canonicalRow.append(self._normalizeValue(ch, value, policy))
                # consider as row if at least one non-empty meaningful field
                if any(v.strip() for v in canonicalRow):
                    rows.append(canonicalRow)
        canonical = {
            "metadata": {
                "title": mergedJson.get("metadata", {}).get("title", "Merged Document"),
                "source_documents": mergedJson.get("metadata", {}).get("source_documents", [])
            },
            "sections": [
                {
                    "id": "canonical_table_1",
                    "content_type": "table",
                    "elements": [
                        {
                            "headers": canonicalHeaders,
                            "rows": rows
                        }
                    ],
                    "order": 1
                }
            ]
        }
        # debug artifact
        self._writeDebugArtifact("canonical_merged.json", canonical)
        return canonical
    def validateCanonical(self, canonicalJson: Dict[str, Any]) -> Dict[str, Any]:
        rows = []
        try:
            sections = canonicalJson.get("sections", [])
            for s in sections:
                if s.get("content_type") == "table":
                    # Extract rows from elements array
                    for element in s.get("elements", []):
                        if isinstance(element, dict) and "rows" in element:
                            rows.extend(element.get("rows", []))
        except Exception:
            rows = []
        report = {
            "rowCount": len(rows),
            "success": len(rows) > 0
        }
        self._writeDebugArtifact("normalization_report.json", report)
        return report
    # Internal helpers
    def _normalizeValue(self, canonicalHeader: str, value: Any, policy: Dict[str, Any]) -> str:
        if value is None:
            return ""
        text = str(value).strip()
        # Generic normalization guided by policy; avoid domain specifics
        if canonicalHeader in (policy.get("numericFields", []) or []):
            dec = ((policy.get(canonicalHeader) or {}).get("decimalSeparator")
                   or (policy.get("numeric") or {}).get("decimalSeparator")
                   or ".")
            if dec == ",":
                text = text.replace(".", "").replace(",", ".") if "," in text else text
            text = ''.join(ch for ch in text if ch.isdigit() or ch in ['.', '-', '+'])
        elif (policy.get("text") or {}).get("stripSymbols") and canonicalHeader in (policy.get("text", {}).get("applyTo", []) or []):
            text = ''.join(ch for ch in text if ch.isalpha())
            text = text.upper()
        return text
    def _writeDebugArtifact(self, fileName: str, obj: Any) -> None:
        try:
            debugEnabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if not debugEnabled:
                return
            root = "./test-chat/ai"
            os.makedirs(root, exist_ok=True)
            # Prefix timestamp for files that are frequently overwritten
            ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
            if fileName in ("mapping.json", "canonical_merged.json"):
                outName = f"{ts}_{fileName}"
            else:
                outName = fileName
            path = os.path.join(root, outName)
            with open(path, "w", encoding="utf-8") as f:
                if isinstance(obj, (dict, list)):
                    f.write(json.dumps(obj, ensure_ascii=False, indent=2))
                else:
                    f.write(str(obj))
        except Exception:
            pass
--- a/modules/services/serviceSharepoint/mainServiceSharepoint.py
+++ b/modules/services/serviceSharepoint/mainServiceSharepoint.py
@ -21,7 +21,7 @@ class SharepointService:
        Use setAccessTokenFromConnection() method to configure the access token before making API calls.
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.access_token = None
        self.base_url = "https://graph.microsoft.com/v1.0"
--- a/modules/services/serviceTicket/mainServiceTicket.py
+++ b/modules/services/serviceTicket/mainServiceTicket.py
@ -16,7 +16,7 @@ class TicketService:
        Args:
            serviceCenter: Service center instance for accessing other services
        """
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
    async def _createTicketInterfaceByType(
        self,
--- a/modules/services/serviceUtils/mainServiceUtils.py
+++ b/modules/services/serviceUtils/mainServiceUtils.py
@ -4,6 +4,7 @@ Provides centralized access to configuration, events, and other utilities.
 """
 import logging
 import os
 from typing import Any, Optional, Dict, Callable
 from modules.shared.configuration import APP_CONFIG
 from modules.shared.eventManagement import eventManager
@ -139,4 +140,43 @@ class UtilsService:
            return TokenManager().getFreshToken(connectionId)
        except Exception as e:
            logger.error(f"Error getting fresh token for connection {connectionId}: {str(e)}")
-            return None
+            return None
    def debugLogToFile(self, message: str, context: str = "DEBUG"):
        """
        Log debug message to file if debug logging is enabled.
        Args:
            message: Debug message to log
            context: Context identifier for the debug message
        """
        try:
            # Check if debug logging is enabled
            debug_enabled = self.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
            if not debug_enabled:
                return
            # Get debug directory
            debug_dir = self.configGet("APP_DEBUG_CHAT_WORKFLOW_DIR", "./test-chat")
            if not os.path.isabs(debug_dir):
                # If relative path, make it relative to the gateway directory
                gateway_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
                debug_dir = os.path.join(gateway_dir, debug_dir)
            # Ensure debug directory exists
            os.makedirs(debug_dir, exist_ok=True)
            # Create debug file path
            debug_file = os.path.join(debug_dir, "debug_workflow.log")
            # Format the debug entry
            timestamp = self.getUtcTimestamp()
            debug_entry = f"[{timestamp}] [{context}] {message}\n"
            # Write to debug file
            with open(debug_file, "a", encoding="utf-8") as f:
                f.write(debug_entry)
        except Exception as e:
            # Don't log debug errors to avoid recursion
            pass
--- a/modules/services/serviceWorkflow/mainServiceWorkflow.py
+++ b/modules/services/serviceWorkflow/mainServiceWorkflow.py
@ -16,7 +16,7 @@ class WorkflowService:
    """Service class containing methods for document processing, chat operations, and workflow management"""
    def __init__(self, serviceCenter):
-        self.serviceCenter = serviceCenter
+        self.services = serviceCenter
        self.user = serviceCenter.user
        self.workflow = serviceCenter.workflow
        self.interfaceDbChat = serviceCenter.interfaceDbChat
@ -78,11 +78,15 @@ class WorkflowService:
    def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]:
        """Get ChatDocuments from a list of document references using all three formats."""
        try:
-            # Get the current workflow from services (same pattern as setWorkflowContext)
+            workflow = self.services.currentWorkflow
-            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
+            
-            if not workflow:
+            # Reload workflow from database to ensure we have all messages
-                logger.error("No workflow available for document list resolution")
+            if hasattr(workflow, 'id'):
-                return []
+                try:
                    workflow = self.getWorkflow(workflow.id)
                    logger.debug(f"Reloaded workflow {workflow.id} with {len(workflow.messages)} messages")
                except Exception as e:
                    logger.warning(f"Could not reload workflow from database: {str(e)}")
            all_documents = []
            for doc_ref in documentList:
@ -125,7 +129,9 @@ class WorkflowService:
                                break
                        if not message_found:
-                            logger.warning(f"Message with ID {message_id} not found in workflow. Available message IDs: {[str(msg.id) for msg in workflow.messages]}")
+                            available_ids = [str(msg.id) for msg in workflow.messages]
                            logger.error(f"Message with ID {message_id} not found in workflow. Available message IDs: {available_ids}")
                            raise ValueError(f"Document reference not found: docList:{message_id}:{label}")
                    elif len(parts) >= 2:
                        # Format: docList:<label> - find message by documentsLabel
                        label = parts[1]
@ -154,7 +160,8 @@ class WorkflowService:
                            else:
                                logger.debug(f"Found docList reference {doc_ref} but message has no documents")
                        else:
-                            logger.debug(f"No messages found with documentsLabel: {label}")
+                            logger.error(f"No messages found with documentsLabel: {label}")
                            raise ValueError(f"Document reference not found: docList:{label}")
                else:
                    # Direct label reference (round1_task2_action3_contextinfo)
                    # Search for messages with matching documentsLabel to find the actual documents
@ -198,30 +205,8 @@ class WorkflowService:
                                else:
                                    logger.debug(f"No documents found in newest message {newest_message.id}")
                            else:
-                                logger.debug(f"No messages found with documentsLabel: {doc_ref}")
+                                logger.error(f"No messages found with documentsLabel: {doc_ref}")
-                                # Fallback: also check if any message has this documentsLabel as a prefix
+                                raise ValueError(f"Document reference not found: {doc_ref}")
                                logger.debug(f"Trying fallback search for messages with documentsLabel containing: {doc_ref}")
                                fallback_messages = []
                                for message in workflow.messages:
                                    msg_documents_label = getattr(message, 'documentsLabel', '')
                                    if msg_documents_label and msg_documents_label.startswith(doc_ref):
                                        fallback_messages.append(message)
                                        logger.debug(f"Found fallback message {message.id} with documentsLabel: {msg_documents_label}")
                                if fallback_messages:
                                    # Sort by publishedAt descending (newest first)
                                    fallback_messages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
                                    newest_fallback = fallback_messages[0]
                                    logger.debug(f"Using fallback message {newest_fallback.id} with documentsLabel: {getattr(newest_fallback, 'documentsLabel', 'unknown')}")
                                    if newest_fallback.documents:
                                        doc_names = [doc.fileName for doc in newest_fallback.documents if hasattr(doc, 'fileName')]
                                        logger.debug(f"Added {len(newest_fallback.documents)} documents from fallback message {newest_fallback.id}: {doc_names}")
                                        all_documents.extend(newest_fallback.documents)
                                    else:
                                        logger.debug(f"No documents found in fallback message {newest_fallback.id}")
                                else:
                                    logger.debug(f"No fallback messages found either")
            logger.debug(f"Resolved {len(all_documents)} documents from document list: {documentList}")
            return all_documents
@ -260,7 +245,8 @@ class WorkflowService:
            token_status = f"error: {str(e)}"
        # Build enhanced reference with state information
-        base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}:{connection.id}"
+        # Format: connection:msft:<username> (without UUID)
        base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}"
        state_info = f" [status:{connection.status.value}, token:{token_status}]"
        logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
@ -283,26 +269,25 @@ class WorkflowService:
            return None
    def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
-        """Get UserConnection from reference string (handles both old and enhanced formats)"""
+        """Get UserConnection from reference string (handles new format without UUID)"""
        try:
-            # Parse reference format: connection:{authority}:{username}:{id} [status:..., token:...]
+            # Parse reference format: connection:{authority}:{username} [status:..., token:...]
            # Remove state information if present
            base_reference = connectionReference.split(' [')[0]
            parts = base_reference.split(':')
-            if len(parts) != 4 or parts[0] != "connection":
+            if len(parts) != 3 or parts[0] != "connection":
                return None
            authority = parts[1]
            username = parts[2]
            conn_id = parts[3]
            # Get user connections through AppObjects interface
            user_connections = self.interfaceDbApp.getUserConnections(self.user.id)
-            # Find matching connection
+            # Find matching connection by authority and username (no UUID needed)
            for conn in user_connections:
-                if str(conn.id) == conn_id and conn.authority.value == authority and conn.externalUsername == username:
+                if conn.authority.value == authority and conn.externalUsername == username:
                    return conn
            return None
@ -437,11 +422,7 @@ class WorkflowService:
    def setWorkflowContext(self, round_number: int = None, task_number: int = None, action_number: int = None):
        """Set current workflow context for document generation and routing"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
            if not workflow:
                logger.error("No workflow available for context setting")
                return
            # Prepare update data
            update_data = {}
@ -548,10 +529,7 @@ class WorkflowService:
    def getDocumentCount(self) -> str:
        """Get document count for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
            if not workflow:
                return "No documents available"
            # Count documents from all messages in the workflow (like old system)
            total_docs = 0
@ -570,10 +548,7 @@ class WorkflowService:
    def getWorkflowHistoryContext(self) -> str:
        """Get workflow history context for task planning (matching old handlingTasks.py logic)"""
        try:
-            # Get the current workflow from services
+            workflow = self.services.currentWorkflow
            workflow = getattr(self.serviceCenter, 'currentWorkflow', None) or self.workflow
            if not workflow:
                return "No previous round context available"
            # Check if there are any previous rounds by looking for "first" messages
            has_previous_rounds = False
@ -622,15 +597,26 @@ class WorkflowService:
            if not workflow or not hasattr(workflow, 'messages'):
                return "No documents available"
            # Use the provided workflow object directly to avoid database reload issues
            # that can cause filename truncation. The workflow object should already be up-to-date.
            logger.debug(f"Using provided workflow object for getAvailableDocuments (ID: {workflow.id if hasattr(workflow, 'id') else 'unknown'})")
            # Debug: Check document filenames in the workflow object
            if hasattr(workflow, 'messages') and workflow.messages:
                for message in workflow.messages:
                    if hasattr(message, 'documents') and message.documents:
                        for doc in message.documents:
                            logger.debug(f"Workflow document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
            # Get document reference list using the exact same logic as old system
            document_list = self._getDocumentReferenceList(workflow)
-            # Build technical context string for AI action planning (exact copy of old system)
+            # Build index string for AI action planning
-            context = "AVAILABLE DOCUMENTS:\n\n"
+            context = ""
-            # Process chat exchanges (current round) - exact copy of old system
+            # Process current round exchanges first
            if document_list["chat"]:
-                context += "CURRENT ROUND DOCUMENTS:\n"
+                context += "\nCurrent round documents:\n"
                for exchange in document_list["chat"]:
                    # Generate docList reference for the exchange (using message ID and label)
                    # Find the message that corresponds to this exchange
@ -656,9 +642,9 @@ class WorkflowService:
                            context += f"  - docItem:{doc_ref}\n"
                context += "\n"
-            # Process history exchanges (previous rounds) - exact copy of old system
+            # Process previous rounds after
            if document_list["history"]:
-                context += "WORKFLOW HISTORY DOCUMENTS:\n"
+                context += "\nPast rounds documents:\n"
                for exchange in document_list["history"]:
                    # Generate docList reference for the exchange (using message ID and label)
                    # Find the message that corresponds to this exchange
@ -685,7 +671,7 @@ class WorkflowService:
                context += "\n"
            if not document_list["chat"] and not document_list["history"]:
-                context += "NO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
+                context += "\nNO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
            return context
@ -713,39 +699,23 @@ class WorkflowService:
        for message in reversed(workflow.messages):
            is_first = message.status == "first" if hasattr(message, 'status') else False
-            # Build a DocumentExchange if message has documents
+            # Build a DocumentExchange if message has documents and an explicit documentsLabel
            doc_exchange = None
            if message.documents:
-                if message.actionId and message.documentsLabel:
+                existing_label = getattr(message, 'documentsLabel', None)
-                    # Validate that we use the same label as in the message
+                if existing_label:
                    # Validate and use the message's actual documentsLabel
                    validated_label = self._validateDocumentLabelConsistency(message)
                    # Use the message's actual documentsLabel
                    doc_refs = []
                    for doc in message.documents:
                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
                        doc_refs.append(doc_ref)
                    doc_exchange = {
                        'documentsLabel': validated_label,
                        'documents': doc_refs
                    }
-                else:
+                # IMPORTANT: Never synthesize new labels here. If a message lacks
-                    # Generate new labels for documents without explicit labels
+                # a documentsLabel, we skip adding an exchange for it.
                    doc_refs = []
                    for doc in message.documents:
                        doc_ref = self._getDocumentReferenceFromChatDocument(doc, message)
                        doc_refs.append(doc_ref)
                    if doc_refs:
                        # Create a label based on message context
                        context_prefix = self._generateWorkflowContextPrefix(message)
                        context_label = f"{context_prefix}_context"
                        doc_exchange = {
                            'documentsLabel': context_label,
                            'documents': doc_refs
                        }
            # Append to appropriate container based on boundary
            if doc_exchange:
@ -773,12 +743,22 @@ class WorkflowService:
        """Update file attributes (fileName, fileSize, mimeType) for documents"""
        for doc in documents:
            try:
                # Debug: Log original filename before refresh
                original_filename = doc.fileName
                logger.debug(f"Before refresh - Document {doc.id}: fileName='{original_filename}' (length: {len(original_filename)})")
                # Use the proper WorkflowService method to get file info
                file_info = self.getFileInfo(doc.fileId)
                if file_info:
                    db_filename = file_info.get("fileName", doc.fileName)
                    logger.debug(f"Database filename for {doc.id}: '{db_filename}' (length: {len(db_filename)})")
                    doc.fileName = file_info.get("fileName", doc.fileName)
                    doc.fileSize = file_info.get("size", doc.fileSize)
                    doc.mimeType = file_info.get("mimeType", doc.mimeType)
                    # Debug: Log final filename after refresh
                    logger.debug(f"After refresh - Document {doc.id}: fileName='{doc.fileName}' (length: {len(doc.fileName)})")
                else:
                    logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
            except Exception as e:
@ -794,6 +774,8 @@ class WorkflowService:
    def _getDocumentReferenceFromChatDocument(self, document, message) -> str:
        """Get document reference using document ID and filename."""
        try:
            # Debug logging to track filename truncation
            logger.debug(f"Creating document reference for {document.id}: fileName='{document.fileName}' (length: {len(document.fileName)})")
            # Use document ID and filename for simple reference
            return f"docItem:{document.id}:{document.fileName}"
        except Exception as e:
@ -844,14 +826,14 @@ class WorkflowService:
        """Get connection reference list (matching old handlingTasks.py logic)"""
        try:
            # Get connections from the database using the same logic as the old system
-            if hasattr(self.serviceCenter, 'interfaceDbApp') and hasattr(self.serviceCenter, 'user'):
+            if hasattr(self.services, 'interfaceDbApp') and hasattr(self.services, 'user'):
-                userId = self.serviceCenter.user.id
+                userId = self.services.user.id
-                connections = self.serviceCenter.interfaceDbApp.getUserConnections(userId)
+                connections = self.services.interfaceDbApp.getUserConnections(userId)
                if connections:
                    # Format connections as reference strings using the same pattern as the old system
                    connectionRefs = []
                    for conn in connections:
-                        # Create reference string in format: connection:{authority}:{username}:{id} [status:..., token:...]
+                        # Create reference string in format: connection:{authority}:{username} [status:..., token:...]
                        # This matches the format expected by getUserConnectionFromConnectionReference()
                        ref = self.getConnectionReferenceFromUserConnection(conn)
                        connectionRefs.append(ref)
--- a/modules/workflows/methods/_EXCLUDED_methodDocument.py
+++ b/modules/workflows/methods/_EXCLUDED_methodDocument.py
@ -42,9 +42,7 @@ class MethodDocument(MethodBase):
        - operationType (str, optional): extract_content | analyze_document | summarize_content. Default: extract_content.
        - processDocumentsIndividually (bool, optional): Process each document separately. Default: True.
        - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
-        - mergeStrategy (dict, optional): Merge strategy for chunked content.
+        - outputMimeType (str, optional): MIME type for output file. Options: "text/plain" (default), "application/json", "text/csv", "text/html". Default: "text/plain".
        - expectedDocumentFormats (list, optional): Desired output format specs.
        - includeMetadata (bool, optional): Include file metadata. Default: True.
        """
        try:
            documentList = parameters.get("documentList")
@ -54,13 +52,7 @@ class MethodDocument(MethodBase):
            operationType = parameters.get("operationType", "extract_content")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
-            mergeStrategy = parameters.get("mergeStrategy", {
+            outputMimeType = parameters.get("outputMimeType", "text/plain")
                "groupBy": "typeGroup",
                "orderBy": "id",
                "mergeType": "concatenate"
            })
            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
            includeMetadata = parameters.get("includeMetadata", True)
            if not documentList:
                return ActionResult.isFailure(
@ -87,19 +79,16 @@ class MethodDocument(MethodBase):
                    compressContext=not chunkAllowed
                )
-                # Add format instructions to prompt if expected formats are provided
+                # Add format instructions to prompt based on MIME type
                enhanced_prompt = prompt
-                if expectedDocumentFormats:
+                mime_type_mapping = {
-                    format_instructions = []
+                    "text/plain": (".txt", "Plain text format"),
-                    for fmt in expectedDocumentFormats:
+                    "application/json": (".json", "Structured JSON format"),
-                        extension = fmt.get("extension", ".txt")
+                    "text/csv": (".csv", "Table format"),
-                        mime_type = fmt.get("mimeType", "text/plain")
+                    "text/html": (".html", "HTML format")
-                        description = fmt.get("description", "")
+                }
-                        format_instructions.append(f"- {extension} ({mime_type}): {description}")
+                extension, description = mime_type_mapping.get(outputMimeType, (".txt", "Plain text format"))
-                    
+                enhanced_prompt += f"\n\nPlease format the output as {extension} ({outputMimeType}): {description}"
                    if format_instructions:
                        enhanced_prompt += f"\n\nPlease format the output as: {', '.join([fmt.get('extension', '.txt') for fmt in expectedDocumentFormats])}"
                        enhanced_prompt += f"\nExpected formats:\n" + "\n".join(format_instructions)
                # Use enhanced AI service for extraction
                ai_response = await self.services.ai.callAi(
@ -125,8 +114,16 @@ class MethodDocument(MethodBase):
            for i, chatDocument in enumerate(chatDocuments):
                # Use the AI response directly - it already contains processed content
                final_content = ai_response
-                final_mime_type = "text/plain"
+                
-                final_extension = ".txt"
+                # Determine output format based on MIME type
                mime_type_mapping = {
                    "text/plain": ".txt",
                    "application/json": ".json",
                    "text/csv": ".csv",
                    "text/html": ".html"
                }
                final_extension = mime_type_mapping.get(outputMimeType, ".txt")
                final_mime_type = outputMimeType
                # Create meaningful output fileName with workflow context
                original_fileName = chatDocument.fileName
@ -156,9 +153,6 @@ class MethodDocument(MethodBase):
                error=str(e)
            )
    @action
    async def generate(self, parameters: Dict[str, Any]) -> ActionResult:
        """
@ -175,8 +169,6 @@ class MethodDocument(MethodBase):
        - operationType (str, optional): generate_report | analyze_documents. Default: generate_report.
        - processDocumentsIndividually (bool, optional): Process per document. Default: True.
        - chunkAllowed (bool, optional): Allow chunking for large inputs. Default: True.
        - mergeStrategy (dict, optional): Merging rules for multi-part generation.
        - includeMetadata (bool, optional): Include file metadata. Default: True.
        """
        try:
            documentList = parameters.get("documentList")
@ -188,12 +180,6 @@ class MethodDocument(MethodBase):
            operationType = parameters.get("operationType", "generate_report")
            processDocumentsIndividually = parameters.get("processDocumentsIndividually", True)
            chunkAllowed = parameters.get("chunkAllowed", True)
            mergeStrategy = parameters.get("mergeStrategy", {
                "groupBy": "typeGroup",
                "orderBy": "id",
                "mergeType": "concatenate"
            })
            includeMetadata = parameters.get("includeMetadata", True)
            if not documentList:
                return ActionResult.isFailure(
--- a/modules/workflows/methods/methodAi.py
+++ b/modules/workflows/methods/methodAi.py
@ -31,14 +31,14 @@ class MethodAi(MethodBase):
    async def process(self, parameters: Dict[str, Any]) -> ActionResult:
        """
        GENERAL:
-        - Purpose: AI-based analysis and content generation with optional document context.
+        - Purpose: Process a user prompt with optional unlimited input documents to produce one or many output documents of the SAME format.
-        - Input requirements: aiPrompt (required); optional documentList, resultType, processingMode, includeMetadata, operationType, priority, maxCost, maxProcessingTime, requiredTags.
+        - Input requirements: aiPrompt (required); optional documentList.
-        - Output format: Single or multiple documents in requested format.
+        - Output format: Exactly one file format to select. For multiple output file formats to do different calls.
        Parameters:
        - aiPrompt (str, required): Instruction for the AI.
        - documentList (list, optional): Document reference(s) for context.
-        - resultType (str, optional): Output extension (txt, json, md, csv, xml, html, pdf, docx, xlsx, png). Default: txt.
+        - resultType (str, optional): Output file extension - only one extension allowed (e.g. txt, json, md, csv, xml, html, pdf, docx, xlsx, png, ...). Default: txt.
        - processingMode (str, optional): basic | advanced | detailed. Default: basic.
        - includeMetadata (bool, optional): Include metadata when available. Default: True.
        - operationType (str, optional): general | generate_plan | analyse_content | generate_content | web_research | image_analysis | image_generation. Default: general.
@ -169,12 +169,12 @@ class MethodAi(MethodBase):
        Parameters:
        - user_prompt (str, required): Research question or topic.
        - urls (list, optional): Specific URLs to crawl.
-        - max_results (int, optional): Max search results. Default: 10.
+        - max_results (int, optional): Max search results. Default: 5.
-        - max_pages (int, optional): Max pages to crawl per site. Default: 10.
+        - max_pages (int, optional): Max pages to crawl per site. Default: 5.
        - search_depth (str, optional): basic | advanced. Default: basic.
        - extract_depth (str, optional): basic | advanced. Default: advanced.
        - pages_search_depth (int, optional): Crawl depth level. Default: 2.
-        - country (str, optional): Country code for bias.
+        - country (str, optional): Full English country name (ISO-3166; map codes via pycountry/i18n-iso-countries).
        - time_range (str, optional): d | w | m | y.
        - topic (str, optional): general | news | academic.
        - language (str, optional): Language code (e.g., de, en, fr).
@ -182,8 +182,8 @@ class MethodAi(MethodBase):
        try:
            user_prompt = parameters.get("user_prompt")
            urls = parameters.get("urls")
-            max_results = parameters.get("max_results", 10)
+            max_results = parameters.get("max_results", 5)
-            max_pages = parameters.get("max_pages", 10)
+            max_pages = parameters.get("max_pages", 5)
            search_depth = parameters.get("search_depth", "basic")
            extract_depth = parameters.get("extract_depth", "advanced")
            pages_search_depth = parameters.get("pages_search_depth", 2)
--- a/modules/workflows/methods/methodOutlook.py
+++ b/modules/workflows/methods/methodOutlook.py
@ -154,7 +154,13 @@ class MethodOutlook(MethodBase):
        if not query or not query.strip():
            # No query specified, just get emails from folder
            if folder and folder.lower() != "all":
-                params["$filter"] = f"parentFolderId eq '{folder}'"
+                # Use folder name directly for well-known folders, or get folder ID
                if folder.lower() in ["inbox", "drafts", "sentitems", "deleteditems"]:
                    params["$filter"] = f"parentFolderId eq '{folder}'"
                else:
                    # For custom folders, we need to get the folder ID first
                    # This will be handled by the calling method
                    params["$filter"] = f"parentFolderId eq '{folder}'"
            # Add orderby for basic queries
            params["$orderby"] = "receivedDateTime desc"
            return params
@ -191,11 +197,21 @@ class MethodOutlook(MethodBase):
            # Use only subject search to keep filter simple
-            params["$filter"] = f"contains(subject,'{clean_query}')"
+            # Handle wildcard queries specially
-            
+            if clean_query == "*" or clean_query == "":
-            # Add folder filter if specified
+                # For wildcard or empty query, don't use contains filter
-            if folder and folder.lower() != "all":
+                # Just use folder filter if specified
-                params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
+                if folder and folder.lower() != "all":
                    params["$filter"] = f"parentFolderId eq '{folder}'"
                else:
                    # No filter needed for wildcard search across all folders
                    pass
            else:
                params["$filter"] = f"contains(subject,'{clean_query}')"
                # Add folder filter if specified
                if folder and folder.lower() != "all":
                    params["$filter"] = f"{params['$filter']} and parentFolderId eq '{folder}'"
            # Add orderby for basic queries
            params["$orderby"] = "receivedDateTime desc"
@ -235,6 +251,10 @@ class MethodOutlook(MethodBase):
        if '@' in filter_text and '.' in filter_text and ' ' not in filter_text and not filter_text.startswith('from:'):
            return {"$filter": f"from/fromAddress/address eq '{filter_text}'"}
        # Handle OData filter conditions (contains 'eq', 'ne', 'gt', 'lt', etc.)
        if any(op in filter_text.lower() for op in [' eq ', ' ne ', ' gt ', ' lt ', ' ge ', ' le ', ' and ', ' or ']):
            return {"$filter": filter_text}
        # Handle text content - search in subject
        return {"$filter": f"contains(subject,'{filter_text}')"}
@ -300,26 +320,31 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Read emails and metadata from a mailbox folder.
-        - Input requirements: connectionReference (required); optional folder, limit, filter, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, filter, outputMimeType.
        - Output format: JSON with emails and metadata.
        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - folder (str, optional): Folder to read from. Default: Inbox.
-        - limit (int, optional): Maximum items to return. Default: 10.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
        - filter (str, optional): Sender, query operators, or subject text.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            folder = parameters.get("folder", "Inbox")
            limit = parameters.get("limit", 10)
            filter = parameters.get("filter")
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
            # Validate limit parameter
            if limit <= 0:
                limit = 1000
                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
            # Validate filter parameter if provided
            if filter:
                # Remove any potentially dangerous characters that could break the filter
@ -343,8 +368,16 @@ class MethodOutlook(MethodBase):
                    "Content-Type": "application/json"
                }
-                # Build the API request
+                # Get the folder ID for the specified folder
-                api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
+                folder_id = self._getFolderId(folder, connection)
                if folder_id:
                    # Build the API request with folder ID
                    api_url = f"{graph_url}/me/mailFolders/{folder_id}/messages"
                else:
                    # Fallback: use folder name directly (for well-known folders like "Inbox")
                    api_url = f"{graph_url}/me/mailFolders/{folder}/messages"
                    logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
                params = {
                    "$top": limit,
                    "$orderby": "receivedDateTime desc"
@ -380,7 +413,11 @@ class MethodOutlook(MethodBase):
                    "count": len(emails_data.get("value", [])),
                    "folder": folder,
                    "filter": filter,
-                    "apiResponse": emails_data
+                    "apiMetadata": {
                        "@odata.context": emails_data.get("@odata.context"),
                        "@odata.count": emails_data.get("@odata.count"),
                        "@odata.nextLink": emails_data.get("@odata.nextLink")
                    }
                }
@ -405,18 +442,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error reading emails from Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to read emails: {str(e)}")
-            # Determine output format based on expected formats
+            # Determine output format based on MIME type
-            output_extension = ".json"  # Default
+            mime_type_mapping = {
-            output_mime_type = "application/json"  # Default
+                "application/json": ".json",
-            
+                "text/plain": ".txt", 
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                "text/csv": ".csv"
-                # Use the first expected format
+            }
-                expected_format = expectedDocumentFormats[0]
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
-                output_extension = expected_format.get("extension", ".json")
+            output_mime_type = outputMimeType
-                output_mime_type = expected_format.get("mimeType", "application/json")
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
@ -454,27 +488,32 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Search emails by query and return matching items with metadata.
-        - Input requirements: connectionReference (required); query (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); query (required); optional folder, limit, outputMimeType.
        - Output format: JSON with search results and metadata.
        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - query (str, required): Search expression.
        - folder (str, optional): Folder scope or All. Default: All.
-        - limit (int, optional): Maximum items to return. Default: 20.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            query = parameters.get("query")
            folder = parameters.get("folder", "All")
-            limit = parameters.get("limit", 20)
+            limit = parameters.get("limit", 1000)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            # Validate parameters
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
            # Validate limit parameter
            if limit <= 0:
                limit = 1000
                logger.warning(f"Invalid limit value ({limit}), using default value 1000")
            if not query or not query.strip():
                return ActionResult.isFailure(error="Search query is required and cannot be empty")
@ -488,12 +527,15 @@ class MethodOutlook(MethodBase):
            # Validate limit
            try:
                limit = int(limit)
-                if limit <= 0 or limit > 1000:  # Microsoft Graph API has limits
+                if limit <= 0:
-                    limit = 20
+                    limit = 1000
-                    logger.warning(f"Limit {limit} is out of range, using default value 20")
+                    logger.warning(f"Invalid limit value (<=0), using default value 1000")
                elif limit > 1000:  # Microsoft Graph API has limits
                    limit = 1000
                    logger.warning(f"Limit {limit} exceeds maximum (1000), using 1000")
            except (ValueError, TypeError):
-                limit = 20
+                limit = 1000
-                logger.warning(f"Invalid limit value, using default value 20")
+                logger.warning(f"Invalid limit value, using default value 1000")
            # Get Microsoft connection
            connection = self._getMicrosoftConnection(connectionReference)
@ -509,9 +551,18 @@ class MethodOutlook(MethodBase):
                    "Content-Type": "application/json"
                }
                # Get the folder ID for the specified folder if needed
                folder_id = None
                if folder and folder.lower() != "all":
                    folder_id = self._getFolderId(folder, connection)
                    if folder_id:
                        logger.debug(f"Found folder ID for '{folder}': {folder_id}")
                    else:
                        logger.warning(f"Could not find folder ID for '{folder}', using folder name directly")
                # Build the search API request
                api_url = f"{graph_url}/me/messages"
-                params = self._buildSearchParameters(query, folder, limit)
+                params = self._buildSearchParameters(query, folder_id or folder, limit)
                # Log search parameters for debugging
                logger.debug(f"Search query: '{query}'")
@ -605,7 +656,11 @@ class MethodOutlook(MethodBase):
                    "count": len(emails),
                    "folder": folder,
                    "limit": limit,
-                    "apiResponse": search_data,
+                    "apiMetadata": {
                        "@odata.context": search_data.get("@odata.context"),
                        "@odata.count": search_data.get("@odata.count"),
                        "@odata.nextLink": search_data.get("@odata.nextLink")
                    },
                    "searchParams": params
                }
@ -618,18 +673,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error searching emails via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to search emails: {str(e)}")
-            # Determine output format based on expected formats
+            # Determine output format based on MIME type
-            output_extension = ".json"  # Default
+            mime_type_mapping = {
-            output_mime_type = "application/json"  # Default
+                "application/json": ".json",
-            
+                "text/plain": ".txt", 
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                "text/csv": ".csv"
-                # Use the first expected format
+            }
-                expected_format = expectedDocumentFormats[0]
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
-                output_extension = expected_format.get("extension", ".json")
+            output_mime_type = outputMimeType
-                output_mime_type = expected_format.get("mimeType", "application/json")
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
@ -664,20 +716,20 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: List draft emails from a folder.
-        - Input requirements: connectionReference (required); optional folder, limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional folder, limit, outputMimeType.
        - Output format: JSON with draft items and metadata.
        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - folder (str, optional): Drafts folder to list. Default: Drafts.
-        - limit (int, optional): Maximum items to return. Default: 20.
+        - limit (int, optional): Maximum items to return. Must be > 0. Default: 1000.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            folder = parameters.get("folder", "Drafts")
-            limit = parameters.get("limit", 20)
+            limit = parameters.get("limit", 1000)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -745,18 +797,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error listing drafts via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to list drafts: {str(e)}")
-            # Determine output format based on expected formats
+            # Determine output format based on MIME type
-            output_extension = ".json"  # Default
+            mime_type_mapping = {
-            output_mime_type = "application/json"  # Default
+                "application/json": ".json",
-            
+                "text/plain": ".txt", 
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                "text/csv": ".csv"
-                # Use the first expected format
+            }
-                expected_format = expectedDocumentFormats[0]
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
-                output_extension = expected_format.get("extension", ".json")
+            output_mime_type = outputMimeType
-                output_mime_type = expected_format.get("mimeType", "application/json")
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
@ -790,18 +839,18 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Find draft emails across folders.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
        - Output format: JSON with drafts and metadata.
        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - limit (int, optional): Maximum items to return. Default: 50.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            limit = parameters.get("limit", 50)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -859,18 +908,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error finding drafts via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to find drafts: {str(e)}")
-            # Determine output format based on expected formats
+            # Determine output format based on MIME type
-            output_extension = ".json"  # Default
+            mime_type_mapping = {
-            output_mime_type = "application/json"  # Default
+                "application/json": ".json",
-            
+                "text/plain": ".txt", 
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                "text/csv": ".csv"
-                # Use the first expected format
+            }
-                expected_format = expectedDocumentFormats[0]
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
-                output_extension = expected_format.get("extension", ".json")
+            output_mime_type = outputMimeType
-                output_mime_type = expected_format.get("mimeType", "application/json")
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
@ -930,18 +976,18 @@ class MethodOutlook(MethodBase):
        """
        GENERAL:
        - Purpose: Check contents of the Drafts folder.
-        - Input requirements: connectionReference (required); optional limit, expectedDocumentFormats.
+        - Input requirements: connectionReference (required); optional limit, outputMimeType.
        - Output format: JSON with drafts and metadata.
        Parameters:
        - connectionReference (str, required): Microsoft connection label.
        - limit (int, optional): Maximum items to return. Default: 20.
-        - expectedDocumentFormats (list, optional): Output format preferences.
+        - outputMimeType (str, optional): MIME type for output file. Options: "application/json" (default), "text/plain", "text/csv". Default: "application/json".
        """
        try:
            connectionReference = parameters.get("connectionReference")
            limit = parameters.get("limit", 20)
-            expectedDocumentFormats = parameters.get("expectedDocumentFormats", [])
+            outputMimeType = parameters.get("outputMimeType", "application/json")
            if not connectionReference:
                return ActionResult.isFailure(error="Connection reference is required")
@ -1003,18 +1049,15 @@ class MethodOutlook(MethodBase):
                logger.error(f"Error checking Drafts folder via Microsoft Graph API: {str(e)}")
                return ActionResult.isFailure(error=f"Failed to check Drafts folder: {str(e)}")
-            # Determine output format based on expected formats
+            # Determine output format based on MIME type
-            output_extension = ".json"  # Default
+            mime_type_mapping = {
-            output_mime_type = "application/json"  # Default
+                "application/json": ".json",
-            
+                "text/plain": ".txt", 
-            if expectedDocumentFormats and len(expectedDocumentFormats) > 0:
+                "text/csv": ".csv"
-                # Use the first expected format
+            }
-                expected_format = expectedDocumentFormats[0]
+            output_extension = mime_type_mapping.get(outputMimeType, ".json")
-                output_extension = expected_format.get("extension", ".json")
+            output_mime_type = outputMimeType
-                output_mime_type = expected_format.get("mimeType", "application/json")
+            logger.info(f"Using output format: {output_extension} ({output_mime_type})")
                logger.info(f"Using expected format: {output_extension} ({output_mime_type})")
            else:
                logger.info("No expected format specified, using default .json format")
--- a/modules/workflows/methods/methodSharepoint.py
+++ b/modules/workflows/methods/methodSharepoint.py
@ -931,7 +931,8 @@ class MethodSharepoint(MethodBase):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
                # Check if pathQuery contains search terms (words without proper path structure)
-                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+                valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
                if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
                # For pathQuery, we need to discover sites to find the specific one
@ -1627,7 +1628,8 @@ class MethodSharepoint(MethodBase):
                    return ActionResult.isFailure(error="pathQuery must start with '/' and include site name with syntax /site:<Site Display Name>/... e.g. /site:KM LayerFinance/Documents/Work")
                # Check if pathQuery contains search terms (words without proper path structure)
-                if not pathQuery.startswith('/site:') and not pathQuery.startswith('/Documents') and not pathQuery.startswith('/Shared Documents'):
+                valid_path_prefixes = ['/site:', '/Documents', '/documents', '/Shared Documents', '/shared documents']
                if not any(pathQuery.startswith(prefix) for prefix in valid_path_prefixes):
                    return ActionResult.isFailure(error=f"Invalid pathQuery '{pathQuery}'. This appears to be search terms, not a valid SharePoint path. Use findDocumentPath action first to search for folders, then use the returned folder path as pathQuery.")
                # For pathQuery, we need to discover sites to find the specific one
--- a/modules/workflows/processing/adaptive/init.py
+++ b/modules/workflows/processing/adaptive/init.py
@ -1,9 +1,9 @@
 # adaptive module for React mode
 # Provides adaptive learning capabilities
-from .intentAnalyzer import IntentAnalyzer, DataType, ExpectedFormat
+from .intentAnalyzer import IntentAnalyzer
 from .contentValidator import ContentValidator
 from .learningEngine import LearningEngine
 from .progressTracker import ProgressTracker
-__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker', 'DataType', 'ExpectedFormat']
+__all__ = ['IntentAnalyzer', 'ContentValidator', 'LearningEngine', 'ProgressTracker']
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@ -1,8 +1,9 @@
 # contentValidator.py
 # Content validation for adaptive React mode
 import re
 import logging
 import json
 import re
 from typing import List, Dict, Any
 logger = logging.getLogger(__name__)
@ -10,34 +11,14 @@ logger = logging.getLogger(__name__)
 class ContentValidator:
    """Validates delivered content against user intent"""
-    def __init__(self):
+    def __init__(self, services=None):
-        pass
+        self.services = services
-    def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
+    async def validateContent(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
-        """Validates delivered content against user intent"""
+        """Validates delivered content against user intent using AI"""
        try:
-            validationDetails = []
+            # Use AI for comprehensive validation
-            
+            return await self._validateWithAI(documents, intent)
            for doc in documents:
                content = self._extractContent(doc)
                detail = self._validateSingleDocument(content, doc, intent)
                validationDetails.append(detail)
            # Calculate overall success
            overallSuccess = all(detail.get("successCriteriaMet", [False]) for detail in validationDetails)
            # Calculate quality score
            qualityScore = self._calculateQualityScore(validationDetails)
            # Generate improvement suggestions
            improvementSuggestions = self._generateImprovementSuggestions(validationDetails, intent)
            return {
                "overallSuccess": overallSuccess,
                "qualityScore": qualityScore,
                "validationDetails": validationDetails,
                "improvementSuggestions": improvementSuggestions
            }
        except Exception as e:
            logger.error(f"Error validating content: {str(e)}")
@ -56,253 +37,236 @@ class ContentValidator:
        except Exception:
            return ""
    def _validateSingleDocument(self, content: str, doc: Any, intent: Dict[str, Any]) -> Dict[str, Any]:
        """Validates a single document against intent"""
        # Check data type match
        dataTypeMatch = self._checkDataTypeMatch(content, intent.get("dataType", "unknown"))
        # Check format match
        formatMatch = self._checkFormatMatch(content, intent.get("expectedFormat", "unknown"))
        # Calculate quality score
        qualityScore = self._calculateDocumentQualityScore(content, intent)
        # Check success criteria
        successCriteriaMet = self._checkSuccessCriteria(content, intent)
        # Identify specific issues
        specificIssues = self._identifySpecificIssues(content, intent)
        # Generate improvement suggestions
        improvementSuggestions = self._generateDocumentImprovementSuggestions(content, intent)
        return {
            "documentName": getattr(doc, 'documentName', 'Unknown'),
            "dataTypeMatch": dataTypeMatch,
            "formatMatch": formatMatch,
            "qualityScore": qualityScore,
            "successCriteriaMet": successCriteriaMet,
            "specificIssues": specificIssues,
            "improvementSuggestions": improvementSuggestions
        }
    def _checkDataTypeMatch(self, content: str, dataType: str) -> bool:
        """Checks if content matches the expected data type"""
        if dataType == "numbers":
            return self._containsNumbers(content)
        elif dataType == "text":
            return self._containsText(content)
        elif dataType == "documents":
            return self._containsDocumentContent(content)
        elif dataType == "analysis":
            return self._containsAnalysis(content)
        elif dataType == "code":
            return self._containsCode(content)
        else:
            return True  # Unknown type, assume match
    def _containsNumbers(self, content: str) -> bool:
        """Checks if content contains actual numbers (not code)"""
        # Look for actual numbers in the content
        numbers = re.findall(r'\b\d+\b', content)
        # Check if it's code (contains function definitions, etc.)
        isCode = any(keyword in content.lower() for keyword in [
            'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
            'return', 'print(', 'console.log', 'public ', 'private '
        ])
        # If it's code, it doesn't contain actual numbers
        if isCode:
            return False
        # If it has numbers and it's not code, it contains actual numbers
        return len(numbers) > 0
    def _containsText(self, content: str) -> bool:
        """Checks if content contains readable text"""
        # Remove numbers and special characters
        textContent = re.sub(r'[^\w\s]', '', content)
        words = textContent.split()
        # Check if there are enough words to be considered text
        return len(words) > 5
    def _containsDocumentContent(self, content: str) -> bool:
        """Checks if content is suitable for document creation"""
        # Check for structured content
        hasStructure = any(indicator in content for indicator in [
            '\n', '\t', '|', '-', '*', '1.', '2.', '•', '◦'
        ])
        # Check for meaningful content
        hasMeaningfulContent = len(content.strip()) > 50
        return hasStructure and hasMeaningfulContent
    def _containsAnalysis(self, content: str) -> bool:
        """Checks if content contains analysis"""
        analysisIndicators = [
            'analysis', 'findings', 'conclusion', 'summary', 'insights',
            'trends', 'patterns', 'comparison', 'evaluation', 'assessment'
        ]
        contentLower = content.lower()
        return any(indicator in contentLower for indicator in analysisIndicators)
    def _containsCode(self, content: str) -> bool:
        """Checks if content contains code"""
        codeIndicators = [
            'def ', 'function', 'import ', 'class ', 'for ', 'while ', 'if ',
            'return', 'print(', 'console.log', 'public ', 'private ', 'void ',
            'int ', 'string ', 'var ', 'let ', 'const '
        ]
        contentLower = content.lower()
        return any(indicator in contentLower for indicator in codeIndicators)
    def _checkFormatMatch(self, content: str, expectedFormat: str) -> bool:
        """Checks if content matches expected format"""
        if expectedFormat == "raw_data":
            # Raw data should be simple, not heavily formatted
            return not any(indicator in content for indicator in [
                '<html>', '<div>', '<table>', '## ', '### ', '**', '__'
            ])
        elif expectedFormat == "formatted":
            # Formatted content should have structure
            return any(indicator in content for indicator in [
                '\n', '\t', '|', '-', '*', '1.', '2.', '•'
            ])
        elif expectedFormat == "structured":
            # Structured content should have clear organization
            return any(indicator in content for indicator in [
                '{', '}', '[', ']', '|', '\t', '  '
            ])
        else:
            return True  # Unknown format, assume match
    def _checkSuccessCriteria(self, content: str, intent: Dict[str, Any]) -> List[bool]:
        """Checks if content meets success criteria"""
        criteriaMet = []
        successCriteria = intent.get("successCriteria", [])
        for criterion in successCriteria:
            if 'prime numbers' in criterion.lower():
                # Check if content contains actual prime numbers, not code
                hasNumbers = bool(re.search(r'\b\d+\b', content))
                isNotCode = not any(keyword in content.lower() for keyword in [
                    'def ', 'function', 'import ', 'class '
                ])
                criteriaMet.append(hasNumbers and isNotCode)
            elif 'document' in criterion.lower():
                # Check if content is suitable for document creation
                hasStructure = any(indicator in content for indicator in [
                    '\n', '\t', '|', '-', '*', '1.', '2.'
                ])
                criteriaMet.append(hasStructure)
            elif 'format' in criterion.lower():
                # Check if content is properly formatted
                hasFormatting = any(indicator in content for indicator in [
                    '\n', '\t', '|', '-', '*', '1.', '2.', '•'
                ])
                criteriaMet.append(hasFormatting)
            else:
                # Generic check - content should not be empty
                criteriaMet.append(len(content.strip()) > 0)
        return criteriaMet
    def _calculateDocumentQualityScore(self, content: str, intent: Dict[str, Any]) -> float:
        """Calculates quality score for a single document"""
        score = 0.0
        # Base score for having content
        if len(content.strip()) > 0:
            score += 0.2
        # Score for data type match
        if self._checkDataTypeMatch(content, intent.get("dataType", "unknown")):
            score += 0.3
        # Score for format match
        if self._checkFormatMatch(content, intent.get("expectedFormat", "unknown")):
            score += 0.2
        # Score for success criteria
        successCriteriaMet = self._checkSuccessCriteria(content, intent)
        if successCriteriaMet:
            successRate = sum(successCriteriaMet) / len(successCriteriaMet)
            score += 0.3 * successRate
        return min(score, 1.0)
    def _calculateQualityScore(self, validationDetails: List[Dict[str, Any]]) -> float:
        """Calculates overall quality score from validation details"""
        if not validationDetails:
            return 0.0
        totalScore = sum(detail.get("qualityScore", 0) for detail in validationDetails)
        return totalScore / len(validationDetails)
    def _identifySpecificIssues(self, content: str, intent: Dict[str, Any]) -> List[str]:
        """Identifies specific issues with the content"""
        issues = []
        # Check for common issues
        if intent.get("dataType") == "numbers" and self._containsCode(content):
            issues.append("Content contains code instead of actual numbers")
        if intent.get("expectedFormat") == "raw_data" and any(indicator in content for indicator in ['<html>', '## ', '**']):
            issues.append("Content is formatted when raw data was requested")
        if len(content.strip()) == 0:
            issues.append("Content is empty")
        return issues
    def _generateDocumentImprovementSuggestions(self, content: str, intent: Dict[str, Any]) -> List[str]:
        """Generates improvement suggestions for a single document"""
        suggestions = []
        dataType = intent.get("dataType", "unknown")
        expectedFormat = intent.get("expectedFormat", "unknown")
        if dataType == "numbers" and self._containsCode(content):
            suggestions.append("Deliver actual numbers, not code to generate them")
        if expectedFormat == "raw_data" and any(indicator in content for indicator in ['<html>', '## ']):
            suggestions.append("Provide raw data without formatting")
        if len(content.strip()) == 0:
            suggestions.append("Provide actual content")
        return suggestions
    def _generateImprovementSuggestions(self, validationDetails: List[Dict[str, Any]], 
                                      intent: Dict[str, Any]) -> List[str]:
        """Generates improvement suggestions based on validation results"""
        suggestions = []
        # Check for common issues
        if not any(detail.get("dataTypeMatch", False) for detail in validationDetails):
            dataType = intent.get("dataType", "unknown")
            suggestions.append(f"Content should contain {dataType} data, not code or other formats")
        if not any(detail.get("formatMatch", False) for detail in validationDetails):
            expectedFormat = intent.get("expectedFormat", "unknown")
            suggestions.append(f"Content should be in {expectedFormat} format")
        # Add specific suggestions from validation details
        for detail in validationDetails:
            suggestions.extend(detail.get("improvementSuggestions", []))
        return list(set(suggestions))  # Remove duplicates
    def _createFailedValidationResult(self, error: str) -> Dict[str, Any]:
        """Creates a failed validation result"""
        return {
            "overallSuccess": False,
            "qualityScore": 0.0,
            "validationDetails": [],
-            "improvementSuggestions": [f"Validation failed: {error}"]
+            "improvementSuggestions": [f"NEXT STEP: Fix validation error - {error}. Check system logs for more details and retry the operation."]
        }
    def _isValidJsonResponse(self, response: str) -> bool:
        """Checks if response contains valid JSON structure"""
        try:
            import re
            # Look for JSON with expected structure
            json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', response, re.DOTALL)
            if json_match:
                json.loads(json_match.group(0))
                return True
            return False
        except:
            return False
    def _extractFallbackValidationResult(self, response: str) -> Dict[str, Any]:
        """Extracts validation result from malformed AI response"""
        try:
            import re
            # Extract key values using regex patterns
            overall_success = re.search(r'"overallSuccess"\s*:\s*(true|false)', response, re.IGNORECASE)
            quality_score = re.search(r'"qualityScore"\s*:\s*([0-9.]+)', response)
            gap_analysis = re.search(r'"gapAnalysis"\s*:\s*"([^"]*)"', response)
            # Determine overall success from context if not found
            if not overall_success:
                # Look for positive/negative indicators in the text
                if any(word in response.lower() for word in ['success', 'complete', 'fulfilled', 'satisfied']):
                    overall_success = True
                elif any(word in response.lower() for word in ['failed', 'incomplete', 'missing', 'error']):
                    overall_success = False
                else:
                    overall_success = False
            return {
                "overallSuccess": overall_success if isinstance(overall_success, bool) else (overall_success.group(1).lower() == 'true' if overall_success else False),
                "qualityScore": float(quality_score.group(1)) if quality_score else 0.5,
                "validationDetails": [{
                    "documentName": "AI Validation (Fallback)",
                    "gapAnalysis": gap_analysis.group(1) if gap_analysis else "Unable to parse detailed analysis",
                    "successCriteriaMet": [False]  # Conservative fallback
                }],
                "improvementSuggestions": ["NEXT STEP: AI response was malformed - retry the operation for better results"]
            }
        except Exception as e:
            logger.error(f"Fallback extraction failed: {str(e)}")
            return None
    async def _validateWithAI(self, documents: List[Any], intent: Dict[str, Any]) -> Dict[str, Any]:
        """AI-based comprehensive validation - single main function"""
        try:
            if not hasattr(self, 'services') or not self.services or not hasattr(self.services, 'ai'):
                return self._createFailedValidationResult("AI service not available")
            # Extract content from all documents
            documentContents = []
            for doc in documents:
                content = self._extractContent(doc)
                documentContents.append({
                    "name": getattr(doc, 'documentName', 'Unknown'),
                    "content": content[:2000]  # Limit content for AI processing
                })
            # Create comprehensive AI validation prompt
            validationPrompt = f"""
 You are a comprehensive task completion validator. Analyze if the delivered content fulfills the user's request.
 USER REQUEST: {intent.get('primaryGoal', 'Unknown')}
 EXPECTED DATA TYPE: {intent.get('dataType', 'unknown')}
 EXPECTED FORMAT: {intent.get('expectedFormat', 'unknown')}
 SUCCESS CRITERIA: {intent.get('successCriteria', [])}
 DELIVERED CONTENT:
 {json.dumps(documentContents, indent=2)}
 Perform comprehensive validation:
 1. Check if content matches expected data type
 2. Check if content matches expected format
 3. Verify success criteria are met
 4. Assess overall quality and completeness
 5. Identify specific gaps and issues
 6. Provide actionable next steps
 CRITICAL: You MUST respond with ONLY the JSON object below. NO TEXT ANALYSIS. NO EXPLANATIONS. NO OTHER CONTENT.
 RESPOND WITH THIS EXACT JSON FORMAT:
 {{
    "overallSuccess": false,
    "qualityScore": 0.5,
    "dataTypeMatch": false,
    "formatMatch": false,
    "successCriteriaMet": [false, false],
    "gapAnalysis": "Content does not match expected format and lacks required elements",
    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
    "validationDetails": [
        {{
            "documentName": "Content Validation",
            "issues": ["Format mismatch", "Missing required elements"],
            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
        }}
    ]
 }}
 """
            # Call AI service for validation
            from modules.datamodels.datamodelAi import AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            response = await self.services.ai.callAi(
                prompt=validationPrompt,
                documents=None,
                options=request_options
            )
            # If first attempt fails, try with more explicit prompt
            if response and not self._isValidJsonResponse(response):
                logger.debug("First AI validation attempt failed, retrying with explicit JSON-only prompt")
                explicitPrompt = f"""
 VALIDATE AND RETURN JSON ONLY - NO TEXT ANALYSIS
 Request: {intent.get('primaryGoal', 'Unknown')}
 Data Type: {intent.get('dataType', 'unknown')}
 Format: {intent.get('expectedFormat', 'unknown')}
 Criteria: {intent.get('successCriteria', [])}
 Content: {json.dumps(documentContents, indent=2)}
 RESPOND WITH THIS EXACT JSON FORMAT - NO OTHER TEXT:
 {{
    "overallSuccess": false,
    "qualityScore": 0.3,
    "dataTypeMatch": false,
    "formatMatch": false,
    "successCriteriaMet": [false, false],
    "gapAnalysis": "Content does not match expected format and lacks required elements",
    "improvementSuggestions": ["NEXT STEP: Create proper content in expected format", "NEXT STEP: Ensure all success criteria are met"],
    "validationDetails": [
        {{
            "documentName": "Content Validation",
            "issues": ["Format mismatch", "Missing required elements"],
            "suggestions": ["NEXT STEP: Fix format", "NEXT STEP: Add missing elements"]
        }}
    ]
 }}
 """
                response = await self.services.ai.callAi(
                    prompt=explicitPrompt,
                    documents=None,
                    options=request_options
                )
            if not response or not response.strip():
                logger.warning("AI validation returned empty response")
                return self._createFailedValidationResult("AI validation failed - empty response")
            # Clean and extract JSON from response
            result = response.strip()
            logger.debug(f"AI validation response length: {len(result)}")
            # Try to find JSON in the response with multiple strategies
            import re
            # Strategy 1: Look for JSON in markdown code blocks
            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1)
                logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
            else:
                # Strategy 2: Look for JSON object with proper structure
                json_match = re.search(r'\{[^{}]*"overallSuccess"[^{}]*\}', result, re.DOTALL)
                if not json_match:
                    # Strategy 3: Look for any JSON object
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
                if json_match:
                    result = json_match.group(0)
                    logger.debug(f"Extracted JSON directly: {result[:200]}...")
                else:
                    logger.debug(f"No JSON found in AI response, trying fallback extraction: {result[:200]}...")
                    logger.debug(f"Full AI response: {result}")
                    # Try fallback extraction for text responses
                    fallback_result = self._extractFallbackValidationResult(result)
                    if fallback_result:
                        logger.info("Using fallback text extraction for validation")
                        return fallback_result
                    logger.warning("All AI validation attempts failed - no JSON found and fallback extraction failed")
                    return self._createFailedValidationResult("AI validation failed - no JSON in response")
            try:
                aiResult = json.loads(result)
                logger.info("AI validation JSON parsed successfully")
                return {
                    "overallSuccess": aiResult.get("overallSuccess", False),
                    "qualityScore": aiResult.get("qualityScore", 0.0),
                    "validationDetails": aiResult.get("validationDetails", [{
                        "documentName": "AI Validation",
                        "gapAnalysis": aiResult.get("gapAnalysis", ""),
                        "successCriteriaMet": aiResult.get("successCriteriaMet", [False])
                    }]),
                    "improvementSuggestions": aiResult.get("improvementSuggestions", [])
                }
            except json.JSONDecodeError as json_error:
                logger.warning(f"All AI validation attempts failed - invalid JSON: {str(json_error)}")
                logger.debug(f"JSON content: {result}")
                # Try to extract key information from malformed response
                fallbackResult = self._extractFallbackValidationResult(result)
                if fallbackResult:
                    logger.info("Using fallback validation result from malformed JSON")
                    return fallbackResult
                return self._createFailedValidationResult(f"AI validation failed - invalid JSON: {str(json_error)}")
            return self._createFailedValidationResult("AI validation failed - no response")
        except Exception as e:
            logger.error(f"AI validation failed: {str(e)}")
            return self._createFailedValidationResult(f"AI validation error: {str(e)}")
--- a/modules/workflows/processing/adaptive/intentAnalyzer.py
+++ b/modules/workflows/processing/adaptive/intentAnalyzer.py
@ -1,228 +1,156 @@
 # intentAnalyzer.py
-# Intent analysis for adaptive React mode
+# Intent analysis for adaptive React mode - AI-based, language-agnostic
-import re
+import json
 import logging
 from typing import Dict, Any, List
 from enum import Enum
 logger = logging.getLogger(__name__)
 class DataType(Enum):
    NUMBERS = "numbers"
    TEXT = "text"
    DOCUMENTS = "documents"
    ANALYSIS = "analysis"
    CODE = "code"
    UNKNOWN = "unknown"
 class ExpectedFormat(Enum):
    RAW_DATA = "raw_data"
    FORMATTED = "formatted"
    STRUCTURED = "structured"
    VISUAL = "visual"
    UNKNOWN = "unknown"
 class IntentAnalyzer:
-    """Analyzes user intent to understand what they actually want"""
+    """Analyzes user intent using AI - language-agnostic and generic"""
-    def __init__(self):
+    def __init__(self, services=None):
-        self.dataTypePatterns = {
+        self.services = services
            DataType.NUMBERS: [
                r'\b(numbers?|digits?|count|list|sequence)\b',
                r'\b(prime|fibonacci|random|even|odd)\s+(numbers?)\b',
                r'\b(calculate|compute|generate)\s+(numbers?)\b',
                r'\b(first|last)\s+\d+\s+(numbers?)\b'
            ],
            DataType.TEXT: [
                r'\b(text|content|words?|sentences?|paragraphs?)\b',
                r'\b(write|create|generate)\s+(text|content)\b',
                r'\b(summary|description|explanation)\b',
                r'\b(article|essay|report)\b'
            ],
            DataType.DOCUMENTS: [
                r'\b(document|file|report|pdf|word|excel)\b',
                r'\b(create|generate|make)\s+(document|file|report)\b',
                r'\b(format|structure|organize)\s+(document)\b',
                r'\b(presentation|slides?)\b'
            ],
            DataType.ANALYSIS: [
                r'\b(analyze|analysis|examine|study|evaluate)\b',
                r'\b(insights?|findings?|results?)\b',
                r'\b(compare|contrast|evaluate)\b',
                r'\b(trends?|patterns?)\b'
            ],
            DataType.CODE: [
                r'\b(code|program|script|algorithm|function)\b',
                r'\b(write|create|develop)\s+(code|program|script)\b',
                r'\b(implement|build|construct)\b',
                r'\b(debug|fix|optimize)\s+(code)\b'
            ]
        }
        self.formatPatterns = {
            ExpectedFormat.RAW_DATA: [
                r'\b(raw|plain|simple|basic)\b',
                r'\b(numbers?|data|list)\b(?!\s+(in|as|with))',
                r'\b(just|only)\s+(numbers?|data)\b'
            ],
            ExpectedFormat.FORMATTED: [
                r'\b(formatted|structured|organized|presented)\b',
                r'\b(table|chart|graph|visual)\b',
                r'\b(pretty|nice|clean)\s+(format|presentation)\b',
                r'\b(professional|polished)\b'
            ],
            ExpectedFormat.STRUCTURED: [
                r'\b(json|xml|csv|structured)\b',
                r'\b(organized|categorized|grouped)\b',
                r'\b(systematic|methodical)\b',
                r'\b(database|spreadsheet)\b'
            ]
        }
-    def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
+    async def analyzeUserIntent(self, userPrompt: str, context: Any) -> Dict[str, Any]:
-        """Analyzes user intent from prompt and context"""
+        """Analyzes user intent from prompt and context using AI"""
        try:
-            # Extract primary goal
+            # Use AI to analyze intent
-            primaryGoal = self._extractPrimaryGoal(userPrompt)
+            aiAnalysis = await self._analyzeIntentWithAI(userPrompt, context)
            if aiAnalysis:
                return aiAnalysis
-            # Classify data type
+            # Fallback to basic analysis if AI fails
-            dataType = self._classifyDataType(userPrompt)
+            return self._createBasicIntentAnalysis(userPrompt)
            # Determine expected format
            expectedFormat = self._determineExpectedFormat(userPrompt)
            # Assess quality requirements
            qualityRequirements = self._assessQualityRequirements(userPrompt, context)
            # Extract success criteria
            successCriteria = self._extractSuccessCriteria(userPrompt, context)
            # Calculate confidence score
            confidenceScore = self._calculateConfidenceScore(dataType, expectedFormat, successCriteria)
            return {
                "primaryGoal": primaryGoal,
                "dataType": dataType.value,
                "expectedFormat": expectedFormat.value,
                "qualityRequirements": qualityRequirements,
                "successCriteria": successCriteria,
                "confidenceScore": confidenceScore
            }
        except Exception as e:
            logger.error(f"Error analyzing user intent: {str(e)}")
            return self._createDefaultIntentAnalysis(userPrompt)
-    def _extractPrimaryGoal(self, userPrompt: str) -> str:
+    async def _analyzeIntentWithAI(self, userPrompt: str, context: Any) -> Dict[str, Any]:
-        """Extracts the primary goal from user prompt"""
+        """Uses AI to analyze user intent - language-agnostic"""
-        # Simple extraction - can be enhanced
+        try:
-        return userPrompt.strip()
+            if not self.services or not hasattr(self.services, 'ai'):
                return None
            # Create AI analysis prompt
            analysisPrompt = f"""
 You are an intent analyzer. Analyze the user's request to understand what they want delivered.
 USER REQUEST: {userPrompt}
 CONTEXT: {getattr(context.task_step, 'objective', '') if hasattr(context, 'task_step') and context.task_step else ''}
 Analyze the user's intent and determine:
 1. What type of data/content they want (numbers, text, documents, analysis, code, etc.)
 2. What format they expect (raw data, formatted, structured, visual, etc.)
 3. What quality requirements they have (accuracy, completeness, format)
 4. What specific success criteria define completion
 CRITICAL: Respond with ONLY the JSON object below. Do not include any explanatory text, analysis, or other content before or after the JSON.
 {{
    "primaryGoal": "The main objective the user wants to achieve",
    "dataType": "numbers|text|documents|analysis|code|unknown",
    "expectedFormat": "raw_data|formatted|structured|visual|unknown",
    "qualityRequirements": {{
        "accuracyThreshold": 0.0-1.0,
        "completenessThreshold": 0.0-1.0,
        "formatRequirement": "any|formatted|raw|structured"
    }},
    "successCriteria": ["specific criterion 1", "specific criterion 2"],
    "confidenceScore": 0.0-1.0
 }}
 """
            # Call AI service for analysis
            from modules.datamodels.datamodelAi import AiCallOptions, OperationType
            request_options = AiCallOptions()
            request_options.operationType = OperationType.GENERAL
            response = await self.services.ai.callAi(
                prompt=analysisPrompt,
                documents=None,
                options=request_options
            )
            # If first attempt fails, try with more explicit prompt
            if response and not self._isValidJsonResponse(response):
                logger.debug("First AI intent analysis attempt failed, retrying with explicit JSON-only prompt")
                explicitPrompt = f"""
 {analysisPrompt}
 IMPORTANT: You must respond with ONLY valid JSON. No explanations, no analysis, no text before or after. Just the JSON object.
 """
                response = await self.services.ai.callAi(
                    prompt=explicitPrompt,
                    documents=None,
                    options=request_options
                )
            if not response or not response.strip():
                logger.warning("AI intent analysis returned empty response")
                return None
            # Clean and extract JSON from response
            result = response.strip()
            logger.debug(f"AI intent analysis response length: {len(result)}")
            # Try to find JSON in the response with multiple strategies
            import re
            # Strategy 1: Look for JSON in markdown code blocks
            json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', result, re.DOTALL)
            if json_match:
                result = json_match.group(1)
                logger.debug(f"Extracted JSON from markdown code block: {result[:200]}...")
            else:
                # Strategy 2: Look for JSON object with proper structure
                json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', result, re.DOTALL)
                if not json_match:
                    # Strategy 3: Look for any JSON object
                    json_match = re.search(r'\{.*\}', result, re.DOTALL)
                if not json_match:
                    logger.warning(f"All AI intent analysis attempts failed - no JSON found in response: {result[:200]}...")
                    logger.debug(f"Full AI response: {result}")
                    return None
                result = json_match.group(0)
                logger.debug(f"Extracted JSON directly: {result[:200]}...")
            try:
                aiResult = json.loads(result)
                logger.info("AI intent analysis JSON parsed successfully")
                return aiResult
            except json.JSONDecodeError as json_error:
                logger.warning(f"All AI intent analysis attempts failed - invalid JSON: {str(json_error)}")
                logger.debug(f"JSON content: {result}")
                return None
            return None
        except Exception as e:
            logger.error(f"AI intent analysis failed: {str(e)}")
            return None
-    def _classifyDataType(self, userPrompt: str) -> DataType:
+    def _createBasicIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
-        """Classifies the type of data the user wants"""
+        """Creates basic intent analysis without AI"""
        promptLower = userPrompt.lower()
        for dataType, patterns in self.dataTypePatterns.items():
            for pattern in patterns:
                if re.search(pattern, promptLower):
                    return dataType
        return DataType.UNKNOWN
    def _determineExpectedFormat(self, userPrompt: str) -> ExpectedFormat:
        """Determines the expected format of the output"""
        promptLower = userPrompt.lower()
        for formatType, patterns in self.formatPatterns.items():
            for pattern in patterns:
                if re.search(pattern, promptLower):
                    return formatType
        return ExpectedFormat.UNKNOWN
    def _assessQualityRequirements(self, userPrompt: str, context: Any) -> Dict[str, Any]:
        """Assesses quality requirements from prompt and context"""
        promptLower = userPrompt.lower()
        # Check for accuracy requirements
        accuracyThreshold = 0.8
        if any(word in promptLower for word in ['exact', 'precise', 'accurate', 'correct']):
            accuracyThreshold = 0.95
        elif any(word in promptLower for word in ['approximate', 'rough', 'estimate']):
            accuracyThreshold = 0.7
        # Check for completeness requirements
        completenessThreshold = 0.8
        if any(word in promptLower for word in ['complete', 'full', 'comprehensive', 'all']):
            completenessThreshold = 0.95
        elif any(word in promptLower for word in ['summary', 'brief', 'overview']):
            completenessThreshold = 0.6
        # Check for format requirements
        formatRequirement = "any"
        if any(word in promptLower for word in ['formatted', 'structured', 'organized']):
            formatRequirement = "formatted"
        elif any(word in promptLower for word in ['raw', 'plain', 'simple']):
            formatRequirement = "raw"
        return {
-            "accuracyThreshold": accuracyThreshold,
+            "primaryGoal": userPrompt.strip(),
-            "completenessThreshold": completenessThreshold,
+            "dataType": "unknown",
-            "formatRequirement": formatRequirement
+            "expectedFormat": "unknown",
            "qualityRequirements": {
                "accuracyThreshold": 0.8,
                "completenessThreshold": 0.8,
                "formatRequirement": "any"
            },
            "successCriteria": ["Delivers what the user requested"],
            "confidenceScore": 0.5
        }
    def _extractSuccessCriteria(self, userPrompt: str, context: Any) -> List[str]:
        """Extracts success criteria from prompt and context"""
        criteria = []
        promptLower = userPrompt.lower()
        # Extract explicit criteria
        if 'first' in promptLower and 'numbers' in promptLower:
            criteria.append("Contains the first N numbers as requested")
        if 'prime' in promptLower:
            criteria.append("Contains actual prime numbers, not code to generate them")
        if 'document' in promptLower:
            criteria.append("Creates a properly formatted document")
        if 'format' in promptLower:
            criteria.append("Content is properly formatted as requested")
        # Add context-based criteria
        if hasattr(context, 'task_step') and context.task_step:
            taskObjective = context.task_step.objective.lower()
            if 'word' in taskObjective:
                criteria.append("Creates a Word document")
            if 'excel' in taskObjective:
                criteria.append("Creates an Excel spreadsheet")
        return criteria if criteria else ["Delivers what the user requested"]
    def _calculateConfidenceScore(self, dataType: DataType, expectedFormat: ExpectedFormat, 
                                successCriteria: List[str]) -> float:
        """Calculates confidence score for the intent analysis"""
        score = 0.0
        # Data type confidence
        if dataType != DataType.UNKNOWN:
            score += 0.3
        # Format confidence
        if expectedFormat != ExpectedFormat.UNKNOWN:
            score += 0.2
        # Success criteria confidence
        if len(successCriteria) > 0:
            score += 0.3
        # Additional confidence for specific patterns
        if len(successCriteria) > 1:
            score += 0.2
        return min(score, 1.0)
    def _createDefaultIntentAnalysis(self, userPrompt: str) -> Dict[str, Any]:
        """Creates a default intent analysis when analysis fails"""
        return {
@ -237,3 +165,16 @@ class IntentAnalyzer:
            "successCriteria": ["Delivers what the user requested"],
            "confidenceScore": 0.1
        }
    def _isValidJsonResponse(self, response: str) -> bool:
        """Checks if response contains valid JSON structure"""
        try:
            import re
            # Look for JSON with expected structure
            json_match = re.search(r'\{[^{}]*"primaryGoal"[^{}]*\}', response, re.DOTALL)
            if json_match:
                json.loads(json_match.group(0))
                return True
            return False
        except:
            return False
--- a/modules/workflows/processing/modes/modeReact.py
+++ b/modules/workflows/processing/modes/modeReact.py
@ -31,8 +31,8 @@ class ReactMode(BaseMode):
    def __init__(self, services, workflow):
        super().__init__(services, workflow)
        # Initialize adaptive components
-        self.intentAnalyzer = IntentAnalyzer()
+        self.intentAnalyzer = IntentAnalyzer(services)
-        self.contentValidator = ContentValidator()
+        self.contentValidator = ContentValidator(services)
        self.learningEngine = LearningEngine()
        self.progressTracker = ProgressTracker()
        self.currentIntent = None
@ -49,13 +49,14 @@ class ReactMode(BaseMode):
        """Execute task using React mode - iterative plan-act-observe-refine loop"""
        logger.info(f"=== STARTING TASK {taskIndex or '?'}: {taskStep.objective} ===")
-        # NEW: Analyze user intent with both original prompt and task objective
+        # NEW: Analyze intents separately for proper validation vs task completion
-        # Get original user prompt from services (clean and reliable)
+        # Workflow-level intent from cleaned original user prompt
        original_prompt = self.services.currentUserPrompt if self.services and hasattr(self.services, 'currentUserPrompt') else taskStep.objective
-        combined_context = f"Original request: {original_prompt}\n\nCurrent task: {taskStep.objective}"
+        self.workflowIntent = await self.intentAnalyzer.analyzeUserIntent(original_prompt, context)
-        
+        # Task-level intent from current task objective (used only for task-scoped checks)
-        self.currentIntent = self.intentAnalyzer.analyzeUserIntent(combined_context, context)
+        self.taskIntent = await self.intentAnalyzer.analyzeUserIntent(taskStep.objective, context)
-        logger.info(f"Intent analysis (original + task): {self.currentIntent}")
+        logger.info(f"Intent analysis — workflow: {self.workflowIntent}")
        logger.info(f"Intent analysis — task: {self.taskIntent}")
        # NEW: Reset progress tracking for new task
        self.progressTracker.reset()
@ -99,18 +100,18 @@ class ReactMode(BaseMode):
                # Attach deterministic label for clarity
                observation['resultLabel'] = result.resultLabel
-                # NEW: Add content validation
+                # NEW: Add content validation (against original cleaned user prompt / workflow intent)
-                if self.currentIntent and result.documents:
+                if getattr(self, 'workflowIntent', None) and result.documents:
-                    validationResult = self.contentValidator.validateContent(result.documents, self.currentIntent)
+                    validationResult = await self.contentValidator.validateContent(result.documents, self.workflowIntent)
                    observation['contentValidation'] = validationResult
                    logger.info(f"Content validation: {validationResult['overallSuccess']} (quality: {validationResult['qualityScore']:.2f})")
                    # NEW: Learn from feedback
-                    feedback = self._collectFeedback(result, validationResult, self.currentIntent)
+                    feedback = self._collectFeedback(result, validationResult, self.workflowIntent)
-                    self.learningEngine.learnFromFeedback(feedback, context, self.currentIntent)
+                    self.learningEngine.learnFromFeedback(feedback, context, self.workflowIntent)
                    # NEW: Update progress
-                    self.progressTracker.updateProgress(result, validationResult, self.currentIntent)
+                    self.progressTracker.updateProgress(result, validationResult, self.workflowIntent)
                decision = await self._refineDecide(context, observation)
@ -204,6 +205,11 @@ class ReactMode(BaseMode):
        selection = json.loads(response[jsonStart:jsonEnd])
        if 'action' not in selection or not isinstance(selection['action'], str):
            raise ValueError("Selection missing 'action' as string")
        # Validate document references - prevent AI from inventing Message IDs
        if 'requiredInputDocuments' in selection:
            self._validateDocumentReferences(selection['requiredInputDocuments'], context)
        # Enforce spec: Stage 1 must NOT include 'parameters'
        if 'parameters' in selection:
            # Remove to avoid accidental carryover
@ -213,6 +219,38 @@ class ReactMode(BaseMode):
                selection['parameters'] = None
        return selection
    def _validateDocumentReferences(self, document_refs: List[str], context: TaskContext) -> None:
        """Validate that document references exist in the current workflow"""
        if not document_refs:
            return
        # Get available documents from the current workflow
        try:
            available_docs = self.services.workflow.getAvailableDocuments(self.services.currentWorkflow)
            if not available_docs or available_docs == "No documents available":
                logger.warning("No documents available for validation")
                return
            # Extract all valid references from available documents
            valid_refs = []
            for line in available_docs.split('\n'):
                if 'docList:' in line or 'docItem:' in line:
                    # Extract reference from line like "  - docList:msg_xxx:label" or "  - docItem:xxx:filename with spaces"
                    ref_match = re.search(r'(docList:[^\s]+|docItem:[^\s]+(?:\s+[^\s]+)*)', line)
                    if ref_match:
                        valid_refs.append(ref_match.group(1))
            # Check if all provided references are valid
            for ref in document_refs:
                if ref not in valid_refs:
                    logger.error(f"Invalid document reference: {ref}")
                    logger.error(f"Available references: {valid_refs}")
                    raise ValueError(f"Document reference '{ref}' not found in available documents. Use only exact references from AVAILABLE_DOCUMENTS_INDEX.")
        except Exception as e:
            logger.error(f"Error validating document references: {str(e)}")
            raise ValueError(f"Failed to validate document references: {str(e)}")
    async def _actExecute(self, context: TaskContext, selection: Dict[str, Any], taskStep: TaskStep, 
                         workflow: ChatWorkflow, stepIndex: int) -> ActionResult:
        """Act: request minimal parameters then execute selected action"""
--- a/modules/workflows/processing/shared/placeholderFactory.py
+++ b/modules/workflows/processing/shared/placeholderFactory.py
@ -42,35 +42,38 @@ def extractUserPrompt(context: Any) -> str:
    Fallback to the task_step objective.
    """
    try:
        # Prefer services.currentUserPrompt when accessible through context
        services = getattr(context, 'services', None)
        if services and getattr(services, 'currentUserPrompt', None):
            return services.currentUserPrompt
    except Exception:
        pass
-    if hasattr(context, 'task_step') and context.task_step:
+        # Determine raw user prompt from services or task_step
-        return context.task_step.objective or 'No request specified'
+        rawPrompt = None
-    return 'No request specified'
+        if services and getattr(services, 'currentUserPrompt', None):
            rawPrompt = services.currentUserPrompt
        elif hasattr(context, 'task_step') and context.task_step:
            rawPrompt = context.task_step.objective or 'No request specified'
        else:
            rawPrompt = 'No request specified'
        # Prefer values computed at workflow start by WorkflowManager analyzer
        normalized = getattr(services, 'currentUserPromptNormalized', None) if services else None
        if normalized:
            return normalized
        return rawPrompt
    except Exception:
        # Robust fallback behavior
        if hasattr(context, 'task_step') and context.task_step:
            return context.task_step.objective or 'No request specified'
        return 'No request specified'
 def extractWorkflowHistory(service: Any, context: Any) -> str:
    """Extract workflow history from context. Maps to {{KEY:WORKFLOW_HISTORY}}
    Reverse-chronological, enriched with message summaries and document labels.
    """
    # Prefer explicit workflow on context; else fall back to services.workflow
    workflow = None
    try:
-        if hasattr(context, 'workflow') and context.workflow:
+        history = getPreviousRoundContext(service, service.currentWorkflow)
            workflow = context.workflow
        elif hasattr(service, 'workflow') and service.workflow:
            workflow = service.workflow
    except Exception:
        workflow = None
    if workflow:
        history = getPreviousRoundContext(service, workflow)
        return history or "No previous workflow rounds available"
-    return "No previous workflow rounds available"
+    except Exception as e:
        logger.error(f"Error getting workflow history: {str(e)}")
        return "No previous workflow rounds available"
 def extractAvailableMethods(service: Any) -> str:
    """Extract available methods for action planning. Maps to {{KEY:AVAILABLE_METHODS}}"""
@ -99,7 +102,15 @@ def extractAvailableMethods(service: Any) -> str:
 def extractUserLanguage(service: Any) -> str:
    """Extract user language from service. Maps to {{KEY:USER_LANGUAGE}}"""
-    return service.user.language if service and service.user else 'en'
+    try:
        # Prefer detected language if available
        if service and getattr(service, 'currentUserLanguage', None):
            return service.currentUserLanguage
        return service.user.language if service and service.user else 'en'
    except Exception:
        return 'en'
 # Normalization now happens centrally in WorkflowManager._sendFirstMessage; no AI call here.
 def _computeMessageSummary(msg) -> str:
@ -371,9 +382,10 @@ def extractLatestRefinementFeedback(context: Any) -> str:
 def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
    """Summary of available documents (count only)."""
    try:
-        documents = service.workflow.getAvailableDocuments(context.workflow)
+        documents = service.workflow.getAvailableDocuments(service.currentWorkflow)
        if documents and documents != "No documents available":
-            doc_count = documents.count("docList:") + documents.count("docItem:")
+            # Count only actual documents, not list labels
            doc_count = documents.count("docItem:")
            return f"{doc_count} documents available from previous tasks"
        return "No documents available"
    except Exception as e:
@ -383,7 +395,7 @@ def extractAvailableDocumentsSummary(service: Any, context: Any) -> str:
 def extractAvailableDocumentsIndex(service: Any, context: Any) -> str:
    """Index of available documents with detailed references for parameter generation."""
    try:
-        return service.workflow.getAvailableDocuments(context.workflow)
+        return service.workflow.getAvailableDocuments(service.currentWorkflow)
    except Exception as e:
        logger.error(f"Error getting document index: {str(e)}")
        return "No documents available"
--- a/modules/workflows/processing/shared/promptGenerationActionsReact.py
+++ b/modules/workflows/processing/shared/promptGenerationActionsReact.py
@ -32,7 +32,7 @@ def generateReactPlanSelectionPrompt(services, context: Any) -> PromptBundle:
        PromptPlaceholder(label="AVAILABLE_CONNECTIONS_INDEX", content=extractAvailableConnectionsIndex(services), summaryAllowed=False),
    ]
-    template = """Select exactly one action to advance the task.
+    template = """Select exactly one next action to advance the task incrementally.
 OBJECTIVE:
 {{KEY:USER_PROMPT}}
@ -52,7 +52,11 @@ AVAILABLE_DOCUMENTS_INDEX:
 AVAILABLE_CONNECTIONS_INDEX:
 {{KEY:AVAILABLE_CONNECTIONS_INDEX}}
-REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text):
+REPLY: Return ONLY a JSON object with the following structure (no comments, no extra text). The chosen action MUST:
 - be the next logical incremental step toward fulfilling the objective
 - not attempt to complete the entire objective in one step
 - if producing files, target exactly one output format for this step
 - reference ONLY existing document IDs/labels from AVAILABLE_DOCUMENTS_INDEX
 {{
    "action": "method.action_name",
    "actionObjective": "...",
@ -64,7 +68,7 @@ REPLY: Return ONLY a JSON object with the following structure (no comments, no e
 EXAMPLE how to assign references from AVAILABLE_DOCUMENTS_INDEX and AVAILABLE_CONNECTIONS_INDEX:
 "requiredInputDocuments": ["docList:msg_47a7a578-e8f2-4ba8-ac66-0dbff40605e0:round8_task1_action1_results","docItem:5d8b7aee-b546-4487-b6a8-835c86f7b186:AI_Generated_Document_20251006-104256.docx"],
-"requiredConnection": "connection:msft:p.motsch@valueon.ch:1ae8b8e5-128b-49b8-b1cb-7c632669eeae",
+"requiredConnection": "connection:msft:p.motsch@valueon.ch",
 RULES:
 1. Use EXACT action names from AVAILABLE_METHODS
@ -72,7 +76,11 @@ RULES:
 3. parametersContext must be short and sufficient for Stage 2
 4. Return ONLY JSON - no markdown, no explanations
 5. For requiredInputDocuments, use ONLY exact references from AVAILABLE_DOCUMENTS_INDEX (docList:... or docItem:...)
   - DO NOT invent or modify Message IDs
   - DO NOT create new references
   - Copy references EXACTLY as shown in AVAILABLE_DOCUMENTS_INDEX
 6. For requiredConnection, use ONLY an exact label from AVAILABLE_CONNECTIONS_INDEX
 7. Plan incrementally: if the overall intent needs multiple output formats (e.g., CSV and HTML), choose one format in this step and leave the other(s) for subsequent steps
 """
    return PromptBundle(prompt=template, placeholders=placeholders)
--- a/modules/workflows/processing/shared/promptGenerationTaskplan.py
+++ b/modules/workflows/processing/shared/promptGenerationTaskplan.py
@ -28,6 +28,8 @@ def generateTaskPlanningPrompt(services, context: Any) -> PromptBundle:
 Break down user requests into logical, executable task steps.
 **IMPORTANT**: If the user asks for ONE complete business objective, create ONLY ONE task that accomplishes the entire objective. Do NOT split it into multiple micro-tasks.
 ## 📋 Context
 ### User Request
@ -46,12 +48,20 @@ Break down user requests into logical, executable task steps.
 - **ONE TOPIC PER TASK** - Each task should handle one complete business objective
 - **HIGH-LEVEL FOCUS** - Plan strategic outcomes, not implementation steps
 - **AVOID MICRO-TASKS** - Don't create separate tasks for each small action
 - **CRITICAL**: If the user asks for ONE thing (like "analyse document list and produce summary"), create ONLY ONE task that does the complete job
 ### Task Grouping Examples
 - **Research + Analysis + Report** → ONE task: "Web research report"
 - **Data Collection + Processing + Visualization** → ONE task: "Collect and present data"
 - **Document splitting** (analyze + extract + create files) → ONE task: "Split document into separate files"
 - **Different topics** (email + flowers) → SEPARATE tasks: "Send formal email..." + "Order flowers from Fleurop for delivery to 123 Main St, include card message"
 ### Common Single-Task Scenarios
 - **"Split document into sections"** → ONE task: "Split document into separate files"
 - **"Extract data and create report"** → ONE task: "Extract data and create report"
 - **"Analyze and summarize document"** → ONE task: "Analyze and summarize document"
 - **"Convert file to different format"** → ONE task: "Convert file to different format"
 ### Retry Handling
 - **If retry request**: Analyze previous rounds to understand what failed
 - **Learn from mistakes**: Improve the plan based on previous failures
--- a/modules/workflows/workflowManager.py
+++ b/modules/workflows/workflowManager.py
@ -216,23 +216,23 @@ class WorkflowManager:
                    # Update the message with documents in database
                    self.services.workflow.updateMessage(message.id, {"documents": [doc.to_dict() for doc in documents]})
-                # Analyze the user's input to extract intent and offload bulky context into documents
+                # Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents
                try:
                    analyzerPrompt = (
-                        "You are an input analyzer. Split the user's message into:\n"
+                        "You are an input analyzer. From the user's message, perform ALL of the following in one pass:\n"
-                        "1) intent: the user's core request in one concise paragraph, normalized to the user's language.\n"
+                        "1) detectedLanguage: detect ISO 639-1 language code (e.g., de, en).\n"
-                        "2) contextItems: supportive data to attach as separate documents if significantly larger than the intent. "
+                        "2) normalizedRequest: full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details.\n"
-                        "Include large literal data blocks, long lists/tables, code/JSON blocks, quoted transcripts, CSV fragments, or detailed specs. "
+                        "3) intent: concise single-paragraph core request in the detected language for high-level routing.\n"
-                        "Keep URLs in the intent unless they include large pasted content.\n\n"
+                        "4) contextItems: supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content.\n\n"
                        "Rules:\n"
-                        "- If total content length (intent + data) is less than 10% of the model's max tokens, do not extract; "
+                        "- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained.\n"
-                        "return an empty contextItems and keep a compact, self-contained intent.\n"
+                        "- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear.\n"
-                        "- If content exceeds that, move bulky parts into contextItems, keeping the intent short and clear.\n"
+                        "- Preserve critical references (URLs, filenames) in intent.\n"
-                        "- Preserve critical references (URLs, filenames) in the intent.\n"
+                        "- Normalize to the primary detected language if mixed-language.\n\n"
-                        "- Normalize the intent to the detected language. If mixed-language, use the primary detected language and normalize.\n\n"
+                        "Return ONLY JSON (no markdown) with this shape:\n"
                        "Output JSON only (no markdown):\n"
                        "{\n"
-                        "  \"detectedLanguage\": \"en\",\n"
+                        "  \"detectedLanguage\": \"de|en|fr|it|...\",\n"
                        "  \"normalizedRequest\": \"Full explicit instruction in detected language\",\n"
                        "  \"intent\": \"Concise normalized request...\",\n"
                        "  \"contextItems\": [\n"
                        "    {\n"
@ -249,6 +249,7 @@ class WorkflowManager:
                    aiResponse = await self.services.ai.callAi(prompt=analyzerPrompt)
                    detectedLanguage = None
                    normalizedRequest = None
                    intentText = userInput.prompt
                    contextItems = []
@ -260,6 +261,7 @@ class WorkflowManager:
                        if jsonStart != -1 and jsonEnd > jsonStart:
                            parsed = json.loads(aiResponse[jsonStart:jsonEnd])
                            detectedLanguage = parsed.get('detectedLanguage') or None
                            normalizedRequest = parsed.get('normalizedRequest') or None
                            if parsed.get('intent'):
                                intentText = parsed.get('intent')
                            contextItems = parsed.get('contextItems') or []
@ -269,7 +271,18 @@ class WorkflowManager:
                    # Update services state
                    if detectedLanguage and isinstance(detectedLanguage, str):
                        self._setUserLanguage(detectedLanguage)
                        try:
                            setattr(self.services, 'currentUserLanguage', detectedLanguage)
                        except Exception:
                            pass
                    self.services.currentUserPrompt = intentText or userInput.prompt
                    try:
                        if normalizedRequest:
                            setattr(self.services, 'currentUserPromptNormalized', normalizedRequest)
                        if contextItems is not None:
                            setattr(self.services, 'currentUserContextItems', contextItems)
                    except Exception:
                        pass
                    # Telemetry (sizes and counts)
                    try:
@ -329,8 +342,6 @@ class WorkflowManager:
                            if not message.documents:
                                message.documents = []
                            message.documents.extend(created_docs)
                            # Ensure label is user_context for discoverability
                            message.documentsLabel = context_label
                            self.services.workflow.updateMessage(message.id, {
                                "documents": [d.to_dict() for d in message.documents],
                                "documentsLabel": context_label
--- a/requirements.txt
+++ b/requirements.txt
@ -41,6 +41,7 @@ markdown
 ## Web Scraping & HTTP
 beautifulsoup4==4.12.2  # Required for HTML/XML parsing
 requests==2.31.0
 requests-oauthlib==1.3.1  # Required for Google OAuth2Session
 chardet>=5.0.0      # Für Zeichensatzerkennung bei Webinhalten
 aiohttp>=3.8.0      # Required for SharePoint operations (async HTTP)
 selenium>=4.15.0    # Required for web automation and JavaScript-heavy pages
--- a/test_document_processing.py
+++ b/test_document_processing.py
@ -0,0 +1,555 @@
 """
 Test script for document processing and DOCX generation.
 Calls the main AI service directly to process PDF documents and generate DOCX summaries.
 """
 import asyncio
 import sys
 import os
 import logging
 import base64
 from datetime import datetime
 from pathlib import Path
 # Add the gateway module to the path
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'modules'))
 from modules.datamodels.datamodelChat import ChatDocument
 from modules.datamodels.datamodelAi import EnhancedAiCallOptions
 from modules.services.serviceAi.mainServiceAi import AiService
 from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
 # Set up logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 async def process_documents_and_generate_summary():
    """Process documents using the main AI service with intelligent chunk integration."""
    logger.info("🚀 Starting intelligent chunk integration test...")
    # Find testdata directory
    testdata_path = Path("../wiki/poweron/testdata")
    if not testdata_path.exists():
        # Try relative to current directory
        testdata_path = Path("wiki/poweron/testdata")
        if not testdata_path.exists():
            # Try relative to parent directory
            testdata_path = Path("../wiki/poweron/testdata")
            if not testdata_path.exists():
                logger.error(f"❌ Testdata path not found. Tried:")
                logger.error(f"  - ../wiki/poweron/testdata")
                logger.error(f"  - wiki/poweron/testdata") 
                logger.error(f"  - ../wiki/poweron/testdata")
                logger.info("Please ensure the testdata folder exists with PDF documents")
                return False
    # Find all supported document files
    supported_extensions = [
        # Document formats
        "*.pdf", "*.docx", "*.xlsx", "*.pptx", "*.ppt",
        # Image formats
        "*.jpg", "*.jpeg", "*.png", "*.gif", "*.webp", "*.bmp", "*.tiff",
        # Text and code files
        "*.txt", "*.md", "*.log", "*.rtf", "*.tex", "*.rst", "*.adoc", "*.org", "*.pod",
        "*.java", "*.js", "*.jsx", "*.ts", "*.tsx", "*.py", "*.rb", "*.go", "*.rs", "*.cpp", "*.c", "*.h", "*.hpp", "*.cc", "*.cxx",
        "*.cs", "*.php", "*.swift", "*.kt", "*.scala", "*.clj", "*.hs", "*.ml", "*.fs", "*.vb", "*.dart", "*.r", "*.m", "*.pl", "*.sh",
        "*.html", "*.htm", "*.css", "*.scss", "*.sass", "*.less", "*.vue", "*.svelte",
        "*.config", "*.ini", "*.cfg", "*.conf", "*.properties", "*.yaml", "*.yml", "*.toml", "*.json", "*.xml",
        "*.bat", "*.ps1", "*.psm1", "*.psd1", "*.vbs", "*.wsf", "*.cmd", "*.com",
        "*.csv", "*.tsv", "*.tab", "*.dat", "*.data",
        "*.man", "*.1", "*.2", "*.3", "*.4", "*.5", "*.6", "*.7", "*.8", "*.9", "*.n", "*.l", "*.m", "*.r", "*.t", "*.x", "*.y", "*.z",
        "*.diff", "*.patch", "*.gitignore", "*.dockerignore", "*.editorconfig", "*.gitattributes",
        "*.env", "*.env.local", "*.env.development", "*.env.production", "*.env.test",
        "*.lock", "*.lockb", "*.lockfile", "*.pkg-lock", "*.yarn-lock"
    ]
    document_files = []
    for ext in supported_extensions:
        document_files.extend(list(testdata_path.glob(ext)))
    logger.info(f"Found {len(document_files)} document files in testdata:")
    for doc_file in document_files:
        logger.info(f"  - {doc_file.name}")
    if not document_files:
        logger.error("❌ No supported document files found in testdata folder")
        return False
    try:
        # Mock the database interface to provide our file data BEFORE creating AI service
        class TestDbInterface:
            def __init__(self, file_data_map):
                self.file_data_map = file_data_map
            def getFileData(self, file_id):
                logger.info(f"TestDbInterface.getFileData called with file_id: {file_id}")
                data = self.file_data_map.get(file_id)
                if data:
                    logger.info(f"✅ Found file data for {file_id}: {len(data)} bytes")
                else:
                    logger.warning(f"❌ No file data found for {file_id}")
                return data
        # Create file data mapping
        file_data_map = {}
        for i, doc_file in enumerate(document_files):
            with open(doc_file, 'rb') as f:
                file_data_map[f"test_doc_{i+1}"] = f.read()
                logger.info(f"📁 Loaded {doc_file.name} as test_doc_{i+1}: {len(file_data_map[f'test_doc_{i+1}'])} bytes")
        # Mock the database interface BEFORE creating AI service
        import modules.interfaces.interfaceDbComponentObjects as db_interface_module
        original_get_interface = db_interface_module.getInterface
        db_interface_module.getInterface = lambda: TestDbInterface(file_data_map)
        logger.info("🔧 Database interface mocked successfully")
        # Create a mock service center with utils
        class MockServiceCenter:
            def __init__(self):
                self.utils = MockUtils()
        class MockUtils:
            def debugLogToFile(self, message, label):
                logger.debug(f"[{label}] {message}")
                print(f"DEBUG [{label}]: {message}")  # Also print to console for visibility
                # Only write to debug file if debug logging is enabled (matching real implementation)
                debug_enabled = self.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
                if debug_enabled:
                    try:
                        import os
                        from datetime import datetime, UTC
                        debug_dir = self.configGet("APP_DEBUG_CHAT_WORKFLOW_DIR", "./test-chat")
                        if not os.path.isabs(debug_dir):
                            # If relative path, make it relative to the gateway directory
                            gateway_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))
                            debug_dir = os.path.join(gateway_dir, debug_dir)
                        os.makedirs(debug_dir, exist_ok=True)
                        debug_file = os.path.join(debug_dir, "debug_workflow.log")
                        timestamp = datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
                        debug_entry = f"[{timestamp}] [{label}] {message}\n"
                        with open(debug_file, "a", encoding="utf-8") as f:
                            f.write(debug_entry)
                    except Exception:
                        pass  # Don't fail on debug logging errors
            def configGet(self, key, default):
                # Return debug settings
                if key == "APP_DEBUG_CHAT_WORKFLOW_ENABLED":
                    return True
                elif key == "APP_DEBUG_CHAT_WORKFLOW_DIR":
                    return "./test-chat"
                return default
        mock_service_center = MockServiceCenter()
        # Initialize the main AI service - let it handle everything
        logger.info("🔧 Initializing main AI service...")
        ai_service = await AiService.create(mock_service_center)
        # Create test documents - the AI service will handle file access internally
        documents = []
        logger.info(f"📁 Found {len(document_files)} document files")
        for i, doc_file in enumerate(document_files):
            logger.info(f"📄 Processing file {i+1}/{len(document_files)}: {doc_file.name}")
            # Determine MIME type based on file extension
            mime_type = "application/octet-stream"  # default
            if doc_file.suffix.lower() == '.pdf':
                mime_type = "application/pdf"
            elif doc_file.suffix.lower() in ['.jpg', '.jpeg']:
                mime_type = "image/jpeg"
            elif doc_file.suffix.lower() == '.png':
                mime_type = "image/png"
            elif doc_file.suffix.lower() == '.gif':
                mime_type = "image/gif"
            elif doc_file.suffix.lower() == '.docx':
                mime_type = "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
            elif doc_file.suffix.lower() == '.xlsx':
                mime_type = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
            elif doc_file.suffix.lower() == '.pptx':
                mime_type = "application/vnd.openxmlformats-officedocument.presentationml.presentation"
            elif doc_file.suffix.lower() == '.ppt':
                mime_type = "application/vnd.ms-powerpoint"
            elif doc_file.suffix.lower() == '.html':
                mime_type = "text/html"
            elif doc_file.suffix.lower() == '.csv':
                mime_type = "text/csv"
            elif doc_file.suffix.lower() == '.json':
                mime_type = "application/json"
            elif doc_file.suffix.lower() in ['.txt', '.md']:
                mime_type = "text/plain"
            chat_doc = ChatDocument(
                fileId=f"test_doc_{i+1}",
                messageId=f"test_message_{i+1}",
                fileName=doc_file.name,
                mimeType=mime_type,
                fileSize=doc_file.stat().st_size,
                roundNumber=1,
                taskNumber=1,
                actionNumber=1,
                actionId=f"test_action_{i+1}"
            )
            documents.append(chat_doc)
            logger.info(f"✅ Created ChatDocument: {chat_doc.fileName} ({chat_doc.mimeType}) - {chat_doc.fileSize} bytes")
        logger.info(f"📄 Created {len(documents)} document objects")
        # Create enhanced AI call options for intelligent chunked processing
        ai_options = EnhancedAiCallOptions(
            operationType="general",
            enableParallelProcessing=True,
            maxConcurrentChunks=5,  # Increased for better testing
            preserveChunkMetadata=True,
            chunkSeparator="\n\n---\n\n"
        )
        # Call the main AI service directly - let it handle everything including DOCX generation
        logger.info("🤖 Calling main AI service with intelligent merging...")
        # Run a single end-to-end test to avoid the loop issue
        logger.info("🧪 Running single end-to-end test...")
        userPrompt = "Analyze the document containing mails for customer use cases. Can you create one file for each email in plain text format?"
        # userPrompt = "Can you create one file for each section in the document"
        # userPrompt = "Analyze these documents and create a fitting image for the content"
        # userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
        # userPrompt = "Create a docx file containing a summary and the COMPLETE list from the pdf file, having one additional column with a 'x' marker for all items, which are yellow highlighted."
        # userPrompt = "Create a docx file containing the combined documents in french language."
        try:
            # Single AI call with DOCX generation
            ai_response = await ai_service.callAi(
                prompt=userPrompt,
                documents=documents,
                options=ai_options,
                outputFormat="txt",
                title="Kunden und Use Cases"
            )
            logger.info(f"✅ End-to-end test completed successfully")
            logger.info(f"📊 Response type: {type(ai_response)}")
            logger.info(f"📊 Response length: {len(str(ai_response))} characters")
            # Single test result
            test_results = [{
                "test_name": "End-to-End DOCX Generation",
                "success": True,
                "response_type": type(ai_response).__name__,
                "response_length": len(str(ai_response)),
                "response": ai_response
            }]
        except Exception as e:
            logger.error(f"❌ End-to-end test failed: {str(e)}")
            test_results = [{
                "test_name": "End-to-End DOCX Generation",
                "success": False,
                "error": str(e),
                "response": None
            }]
        logger.info(f"🎯 Completed 1 end-to-end test")
        # Process all test results and save outputs
        logger.info("📊 Processing test results...")
        successful_tests = [r for r in test_results if r['success']]
        failed_tests = [r for r in test_results if not r['success']]
        logger.info(f"✅ Successful tests: {len(successful_tests)}")
        logger.info(f"❌ Failed tests: {len(failed_tests)}")
        # Display test results summary
        logger.info("=" * 80)
        logger.info("END-TO-END TEST RESULTS SUMMARY")
        logger.info("=" * 80)
        for i, result in enumerate(test_results, 1):
            status = "✅ PASS" if result['success'] else "❌ FAIL"
            logger.info(f"Test {i}: {result['test_name']} - {status}")
            if result['success']:
                logger.info(f"  Response Type: {result['response_type']}")
                logger.info(f"  Response Length: {result['response_length']} characters")
            else:
                logger.info(f"  Error: {result['error']}")
        logger.info("=" * 80)
        # Create output directory if it doesn't exist
        output_dir = Path("test-chat/unittestoutput")
        output_dir.mkdir(parents=True, exist_ok=True)
        # Save all test results and generated files
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        logger.info("💾 Saving test results and generated files...")
        try:
            for i, result in enumerate(successful_tests, 1):
                test_name = result['test_name'].replace(' ', '_').lower()
                response = result['response']
                logger.info(f"💾 Saving Test {i}: {result['test_name']}")
                # Handle different response types
                if isinstance(response, dict):
                    # Document generation response
                    if 'documents' in response and response['documents']:
                        logger.info(f"📄 Found {len(response['documents'])} documents in response")
                        for j, doc in enumerate(response['documents']):
                            doc_name = doc.get('documentName', f'{test_name}_document_{j+1}')
                            doc_data = doc.get('documentData', '')
                            doc_mime = doc.get('mimeType', 'application/octet-stream')
                            logger.info(f"📄 Document {j+1}: {doc_name}")
                            logger.info(f"📄 MIME Type: {doc_mime}")
                            logger.info(f"📄 Data length: {len(doc_data)} characters")
                            # Determine file extension with better MIME type detection
                            file_ext = '.bin'  # Default fallback
                            if doc_mime:
                                if 'docx' in doc_mime.lower() or 'wordprocessingml' in doc_mime.lower():
                                    file_ext = '.docx'
                                elif 'pdf' in doc_mime.lower():
                                    file_ext = '.pdf'
                                elif 'txt' in doc_mime.lower() or 'plain' in doc_mime.lower():
                                    file_ext = '.txt'
                                elif 'html' in doc_mime.lower():
                                    file_ext = '.html'
                                elif 'json' in doc_mime.lower():
                                    file_ext = '.json'
                                elif 'csv' in doc_mime.lower():
                                    file_ext = '.csv'
                                elif 'xlsx' in doc_mime.lower() or 'spreadsheetml' in doc_mime.lower():
                                    file_ext = '.xlsx'
                                elif 'pptx' in doc_mime.lower() or 'presentationml' in doc_mime.lower():
                                    file_ext = '.pptx'
                                elif 'markdown' in doc_mime.lower() or 'md' in doc_mime.lower():
                                    file_ext = '.md'
                                elif 'png' in doc_mime.lower() or 'image' in doc_mime.lower():
                                    file_ext = '.png'
                                elif 'jpg' in doc_mime.lower() or 'jpeg' in doc_mime.lower():
                                    file_ext = '.jpg'
                                else:
                                    logger.warning(f"⚠️ Unknown MIME type: {doc_mime}, using .bin")
                            # Also check filename for hints
                            if doc_name and '.' in doc_name:
                                name_ext = '.' + doc_name.split('.')[-1].lower()
                                if name_ext in ['.docx', '.pdf', '.txt', '.html', '.json', '.csv', '.xlsx', '.pptx', '.md', '.png', '.jpg', '.jpeg']:
                                    file_ext = name_ext
                                    logger.info(f"📄 Using extension from filename: {file_ext}")
                            logger.info(f"📄 Final file extension: {file_ext}")
                            # Save document
                            output_path = output_dir / f"{test_name}_{timestamp}{file_ext}"
                            # Handle different content types
                            if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
                                # Text-based formats - save directly as text
                                with open(output_path, 'w', encoding='utf-8') as f:
                                    f.write(doc_data)
                                logger.info(f"✅ Document saved as text: {output_path} ({len(doc_data)} characters)")
                            elif file_ext in ['.png', '.jpg', '.jpeg']:
                                # Image formats - decode from base64
                                try:
                                    doc_bytes = base64.b64decode(doc_data)
                                    with open(output_path, 'wb') as f:
                                        f.write(doc_bytes)
                                    logger.info(f"✅ Image saved: {output_path} ({len(doc_bytes)} bytes)")
                                except Exception as e:
                                    logger.warning(f"⚠️ Failed to decode image as base64: {e}")
                                    # Save as text if base64 decoding fails
                                    with open(output_path, 'w', encoding='utf-8') as f:
                                        f.write(doc_data)
                                    logger.info(f"✅ Image saved as text (fallback): {output_path}")
                            else:
                                # Other binary formats - decode from base64
                                try:
                                    doc_bytes = base64.b64decode(doc_data)
                                    with open(output_path, 'wb') as f:
                                        f.write(doc_bytes)
                                    logger.info(f"✅ Document saved as binary: {output_path} ({len(doc_bytes)} bytes)")
                                except Exception as e:
                                    logger.warning(f"⚠️ Failed to decode document as base64: {e}")
                                    # Save as text if base64 decoding fails
                                    with open(output_path, 'w', encoding='utf-8') as f:
                                        f.write(doc_data)
                                    logger.info(f"✅ Document saved as text (fallback): {output_path}")
                    # Also save raw content as text
                    content = response.get('content', '')
                    if content:
                        text_path = output_dir / f"{test_name}_content_{timestamp}.txt"
                        with open(text_path, 'w', encoding='utf-8') as f:
                            # Handle both string and dictionary content
                            if isinstance(content, dict):
                                import json
                                f.write(json.dumps(content, indent=2, ensure_ascii=False))
                            else:
                                f.write(str(content))
                        logger.info(f"✅ Content saved: {text_path}")
                elif isinstance(response, str):
                    # Text response
                    text_path = output_dir / f"{test_name}_response_{timestamp}.txt"
                    with open(text_path, 'w', encoding='utf-8') as f:
                        f.write(response)
                    logger.info(f"✅ Text response saved: {text_path}")
                else:
                    logger.warning(f"⚠️ Unknown response type for {result['test_name']}: {type(response)}")
            # Save failed test details
            if failed_tests:
                error_path = output_dir / f"failed_tests_{timestamp}.txt"
                with open(error_path, 'w', encoding='utf-8') as f:
                    f.write("# Failed Test Details\n\n")
                    for i, result in enumerate(failed_tests, 1):
                        f.write(f"## Test {i}: {result['test_name']}\n")
                        f.write(f"**Error:** {result['error']}\n\n")
                logger.info(f"✅ Failed test details saved: {error_path}")
        except Exception as e:
            logger.error(f"❌ Error saving test results: {str(e)}")
            return False
        # Save comprehensive test report
        report_path = output_dir / f"end_to_end_test_report_{timestamp}.txt"
        with open(report_path, 'w', encoding='utf-8') as f:
            f.write(f"# End-to-End AI Service Test Report\n")
            f.write(f"Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n\n")
            f.write(f"## Test Configuration\n")
            f.write(f"- Documents processed: {len(documents)}\n")
            f.write(f"- Processing method: Intelligent Token-Aware Merging\n")
            f.write(f"- Parallel processing: {ai_options.enableParallelProcessing}\n")
            f.write(f"- Max concurrent chunks: {ai_options.maxConcurrentChunks}\n")
            f.write(f"- Chunk metadata preserved: {ai_options.preserveChunkMetadata}\n")
            f.write(f"- Chunk separator: '{ai_options.chunkSeparator}'\n\n")
            f.write(f"## Document Inventory\n")
            for i, doc in enumerate(documents, 1):
                f.write(f"{i}. **{doc.fileName}**\n")
                f.write(f"   - MIME Type: {doc.mimeType}\n")
                f.write(f"   - File Size: {doc.fileSize:,} bytes\n")
                f.write(f"   - File ID: {doc.fileId}\n\n")
            f.write(f"## Test Results Summary\n")
            f.write(f"- Total Tests: {len(test_results)}\n")
            f.write(f"- Successful: {len(successful_tests)}\n")
            f.write(f"- Failed: {len(failed_tests)}\n")
            f.write(f"- Success Rate: {len(successful_tests)/len(test_results)*100:.1f}%\n\n")
            f.write(f"## Detailed Test Results\n")
            for i, result in enumerate(test_results, 1):
                f.write(f"### Test {i}: {result['test_name']}\n")
                f.write(f"**Status:** {'✅ PASS' if result['success'] else '❌ FAIL'}\n")
                if result['success']:
                    f.write(f"**Response Type:** {result['response_type']}\n")
                    f.write(f"**Response Length:** {result['response_length']} characters\n")
                    # Show response preview
                    response_preview = str(result['response'])[:500]
                    f.write(f"**Response Preview:**\n```\n{response_preview}...\n```\n\n")
                else:
                    f.write(f"**Error:** {result['error']}\n\n")
            f.write(f"## Technical Implementation Details\n")
            f.write(f"This test validates the complete AI service pipeline:\n\n")
            f.write(f"### Tested Components:\n")
            f.write(f"- **Document Extraction**: PDF, DOCX, images, etc.\n")
            f.write(f"- **Intelligent Chunking**: Token-aware merging\n")
            f.write(f"- **Model Selection**: Automatic AI model choice\n")
            f.write(f"- **Parallel Processing**: Concurrent chunk processing\n")
            f.write(f"- **Document Generation**: DOCX, PDF, text output\n")
            f.write(f"- **Error Handling**: Graceful failure management\n\n")
            f.write(f"### Performance Metrics:\n")
            f.write(f"- **Chunk Optimization**: Intelligent merging reduces AI calls\n")
            f.write(f"- **Processing Speed**: Parallel execution\n")
            f.write(f"- **Memory Efficiency**: Token-aware chunking\n")
            f.write(f"- **Output Quality**: Multiple format support\n\n")
            f.write(f"## Generated Files\n")
            for i, result in enumerate(successful_tests, 1):
                test_name = result['test_name'].replace(' ', '_').lower()
                f.write(f"- **Test {i}**: {result['test_name']} → `{test_name}_*_{timestamp}.*`\n")
            if failed_tests:
                f.write(f"- **Failed Tests**: `failed_tests_{timestamp}.txt`\n")
            f.write(f"- **This Report**: `end_to_end_test_report_{timestamp}.txt`\n\n")
            f.write(f"The end-to-end test successfully validates the complete AI service\n")
            f.write(f"pipeline from document input to formatted output generation.\n")
        logger.info(f"✅ Comprehensive test report saved: {report_path}")
        # Show debug file locations
        debug_files = []
        try:
            debug_dir = Path("test-chat")
            if debug_dir.exists():
                debug_files.extend(list(debug_dir.glob("*.log")))
                debug_files.extend(list(debug_dir.glob("ai/*.txt")))
            if debug_files:
                logger.info("📁 Debug files created:")
                for debug_file in debug_files:
                    logger.info(f"  - {debug_file}")
            else:
                logger.info("📁 No debug files found in test-chat directory")
        except Exception as e:
            logger.warning(f"Could not list debug files: {e}")
        # Restore original database interface
        db_interface_module.getInterface = original_get_interface
        return True
    except Exception as e:
        logger.error(f"❌ Error during document processing: {str(e)}")
        import traceback
        logger.error(f"Traceback: {traceback.format_exc()}")
        # Restore original database interface in case of error
        try:
            db_interface_module.getInterface = original_get_interface
        except:
            pass
        return False
 async def main():
    """Main function to run the intelligent chunk integration test."""
    logger.info("🎯 Starting Intelligent Chunk Integration Test")
    logger.info("=" * 60)
    success = await process_documents_and_generate_summary()
    if success:
        logger.info("🎉 Intelligent chunk integration test completed successfully!")
        logger.info("✅ Main AI service handled all processing internally")
        logger.info("✅ Intelligent token-aware merging activated")
        logger.info("✅ DOCX document generated directly by AI service")
        logger.info("✅ Detailed chunk integration analysis saved")
        logger.info("✅ Performance optimization achieved")
    else:
        logger.error("❌ Test failed!")
        logger.error("Please check the error messages above for details")
    logger.info("=" * 60)
 if __name__ == "__main__":
    asyncio.run(main())
--- a/tool_security_encrypt_all_env_files.py
+++ b/tool_security_encrypt_all_env_files.py
@ -0,0 +1,422 @@
 #!/usr/bin/env python3
 """
 Tool for encrypting all *_SECRET variables in all environment files.
 This tool automatically processes all three environment files (dev, int, prod)
 and encrypts any unencrypted *_SECRET variables using the appropriate encryption
 keys for each environment.
 Usage:
    # Encrypt all secrets in all environment files
    python tool_security_encrypt_all_env_files.py
    # Dry run - show what would be changed without making changes
    python tool_security_encrypt_all_env_files.py --dry-run
    # Skip backup creation
    python tool_security_encrypt_all_env_files.py --no-backup
    # Process only specific environment files
    python tool_security_encrypt_all_env_files.py --files env_dev.env env_prod.env
 """
 import sys
 import os
 import argparse
 import shutil
 from pathlib import Path
 from datetime import datetime
 from typing import List, Dict, Any
 # Add the modules directory to the Python path
 current_dir = Path(__file__).parent
 modules_dir = current_dir / 'modules'
 if modules_dir.exists():
    sys.path.insert(0, str(modules_dir))
 else:
    print(f"Error: Modules directory not found: {modules_dir}")
    print(f"Make sure you're running this script from the gateway directory")
    sys.exit(1)
 # Import encryption functions
 try:
    from modules.shared.configuration import encrypt_value
 except ImportError as e:
    print(f"Error: Could not import encryption functions from shared.configuration: {e}")
    print(f"Make sure you're running this script from the gateway directory")
    print(f"Modules directory: {modules_dir}")
    sys.exit(1)
 def get_env_type_from_file(file_path: Path) -> str:
    """
    Read the APP_ENV_TYPE from the environment file.
    Args:
        file_path: Path to the environment file
    Returns:
        str: The environment type (dev, int, prod) or 'dev' as default
    """
    if not file_path.exists():
        return 'dev'
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if line.startswith('APP_ENV_TYPE') and '=' in line:
                    _, value = line.split('=', 1)
                    return value.strip().lower()
    except Exception as e:
        print(f"Warning: Could not read APP_ENV_TYPE from {file_path}: {e}")
    return 'dev'
 def is_any_encrypted_value(value: str) -> bool:
    """
    Check if a value has any encryption prefix (DEV_ENC:, INT_ENC:, PROD_ENC:, etc.).
    Args:
        value: The value to check
    Returns:
        bool: True if the value has any encryption prefix, False otherwise
    """
    if not value or not isinstance(value, str):
        return False
    # Check for any environment-specific encryption prefixes
    return (value.startswith('DEV_ENC:') or 
            value.startswith('INT_ENC:') or 
            value.startswith('PROD_ENC:') or
            value.startswith('TEST_ENC:') or
            value.startswith('STAGING_ENC:'))
 def find_secret_keys_in_file(file_path: Path) -> list:
    """
    Find all *_SECRET keys in an environment file that are not encrypted.
    Args:
        file_path: Path to the environment file
    Returns:
        list: List of tuples (line_number, key, value, full_line)
    """
    secret_keys = []
    if not file_path.exists():
        return secret_keys
    # Get the environment type from the file itself
    file_env_type = get_env_type_from_file(file_path)
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
        i = 0
        while i < len(lines):
            line = lines[i].strip()
            # Skip empty lines and comments
            if not line or line.startswith('#'):
                i += 1
                continue
            # Check if line contains a key-value pair
            if '=' in line:
                key, value = line.split('=', 1)
                key = key.strip()
                value = value.strip()
                # Check if it's a secret key and not already encrypted with ANY prefix
                if key.endswith('_SECRET') and value and not is_any_encrypted_value(value):
                    # Check if value starts with { (JSON object)
                    if value.startswith('{'):
                        # Collect all lines until we find the closing }
                        json_lines = [value]
                        start_line = i + 1
                        i += 1
                        brace_count = value.count('{') - value.count('}')
                        while i < len(lines) and brace_count > 0:
                            json_lines.append(lines[i].rstrip('\n'))
                            brace_count += lines[i].count('{') - lines[i].count('}')
                            i += 1
                        # Join all lines and create the full JSON value
                        full_json_value = '\n'.join(json_lines)
                        secret_keys.append((start_line, key, full_json_value, line))
                        i -= 1  # Adjust for the loop increment
                    else:
                        # Single line value
                        secret_keys.append((i + 1, key, value, line))
                # Check if it's a secret key with multiline JSON (value is just "{")
                elif key.endswith('_SECRET') and value == '{' and not is_any_encrypted_value(value):
                    # Collect all lines until we find the closing }
                    json_lines = [value]
                    start_line = i + 1
                    i += 1
                    brace_count = 1  # We already have one opening brace
                    while i < len(lines) and brace_count > 0:
                        json_lines.append(lines[i].rstrip('\n'))
                        brace_count += lines[i].count('{') - lines[i].count('}')
                        i += 1
                    # Join all lines and create the full JSON value
                    full_json_value = '\n'.join(json_lines)
                    secret_keys.append((start_line, key, full_json_value, line))
                    i -= 1  # Adjust for the loop increment
            i += 1
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
    return secret_keys
 def backup_file(file_path: Path) -> Path:
    """
    Create a backup of the file before modification.
    Args:
        file_path: Path to the file to backup
    Returns:
        Path: Path to the backup file
    """
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    backup_path = file_path.with_suffix(f'.{timestamp}.backup')
    shutil.copy2(file_path, backup_path)
    return backup_path
 def encrypt_all_secrets_in_file(file_path: Path, dry_run: bool = False, create_backup: bool = True) -> Dict[str, Any]:
    """
    Encrypt all non-encrypted secrets in a file.
    Args:
        file_path: Path to the environment file
        dry_run: If True, only show what would be changed
        create_backup: If True, create a backup before modifying
    Returns:
        dict: Results of the encryption process
    """
    # Get the environment type from the file itself
    file_env_type = get_env_type_from_file(file_path)
    results = {
        'file': str(file_path),
        'env_type': file_env_type,
        'secrets_found': 0,
        'secrets_encrypted': 0,
        'errors': [],
        'backup_created': None
    }
    # Find all secret keys
    secret_keys = find_secret_keys_in_file(file_path)
    results['secrets_found'] = len(secret_keys)
    if not secret_keys:
        print(f"   ✅ No unencrypted secrets found - all values already have encryption prefixes")
        return results
    print(f"   Found {len(secret_keys)} non-encrypted secrets")
    if dry_run:
        print("   [DRY RUN] Would encrypt the following secrets:")
        for line_num, key, value, full_line in secret_keys:
            print(f"     Line {line_num}: {key} = {value[:50]}{'...' if len(value) > 50 else ''}")
        return results
    # Create backup if requested
    if create_backup:
        try:
            backup_path = backup_file(file_path)
            results['backup_created'] = str(backup_path)
            print(f"   📋 Backup created: {backup_path.name}")
        except Exception as e:
            results['errors'].append(f"Failed to create backup: {e}")
            print(f"   ⚠️  Warning: Could not create backup: {e}")
    # Read the file content
    try:
        with open(file_path, 'r', encoding='utf-8') as f:
            lines = f.readlines()
    except Exception as e:
        results['errors'].append(f"Failed to read file: {e}")
        return results
    # Process each secret key
    for line_num, key, value, full_line in secret_keys:
        try:
            print(f"   🔐 Encrypting {key}...")
            # Encrypt the value using the environment type from the file
            encrypted_value = encrypt_value(value, file_env_type)
            # Replace the line in the file content
            new_line = f"{key} = {encrypted_value}\n"
            lines[line_num - 1] = new_line
            # If this was a multiline JSON, we need to remove the remaining lines
            if value.startswith('{') and '\n' in value:
                # Count how many lines the original JSON spanned
                json_lines = value.split('\n')
                lines_to_remove = len(json_lines) - 1  # -1 because we already replaced the first line
                # Remove the remaining lines
                for i in range(line_num, line_num + lines_to_remove):
                    if i < len(lines):
                        lines[i] = ""
            results['secrets_encrypted'] += 1
            print(f"      ✓ Encrypted successfully")
        except Exception as e:
            error_msg = f"Failed to encrypt {key}: {e}"
            results['errors'].append(error_msg)
            print(f"      ✗ {error_msg}")
    # Write the modified content back to the file
    if results['secrets_encrypted'] > 0:
        try:
            with open(file_path, 'w', encoding='utf-8') as f:
                f.writelines(lines)
            print(f"   💾 File updated successfully")
        except Exception as e:
            results['errors'].append(f"Failed to write file: {e}")
            print(f"   ✗ Failed to write file: {e}")
    return results
 def process_all_env_files(env_files: List[str] = None, dry_run: bool = False, create_backup: bool = True) -> Dict[str, Any]:
    """
    Process all environment files and encrypt unencrypted secrets.
    Args:
        env_files: List of specific files to process (if None, processes all three default files)
        dry_run: If True, only show what would be changed
        create_backup: If True, create backups before modifying
    Returns:
        dict: Summary of all processing results
    """
    # Default environment files if none specified
    if env_files is None:
        env_files = ['env_dev.env', 'env_int.env', 'env_prod.env']
    # Convert to Path objects and check if they exist
    env_paths = []
    for env_file in env_files:
        env_path = Path(env_file)
        if not env_path.exists():
            print(f"⚠️  Warning: Environment file not found: {env_file}")
            continue
        env_paths.append(env_path)
    if not env_paths:
        print("❌ No valid environment files found to process")
        return {'total_files': 0, 'total_secrets_found': 0, 'total_secrets_encrypted': 0, 'total_errors': 0, 'files': []}
    print("🔐 PowerOn Batch Secret Encryption Tool")
    print("=" * 60)
    print("⚠️  IMPORTANT: The tool will read APP_ENV_TYPE from each file itself")
    print("⚠️  Each file will be processed with its own environment-specific encryption")
    print()
    if dry_run:
        print("🔍 DRY RUN MODE - No changes will be made")
        print()
    # Process each file
    all_results = []
    total_secrets_found = 0
    total_secrets_encrypted = 0
    total_errors = 0
    for env_path in env_paths:
        print(f"\n📁 Processing {env_path.name}:")
        results = encrypt_all_secrets_in_file(env_path, dry_run, create_backup)
        all_results.append(results)
        total_secrets_found += results['secrets_found']
        total_secrets_encrypted += results['secrets_encrypted']
        total_errors += len(results['errors'])
    # Summary
    print("\n" + "=" * 60)
    print("📊 SUMMARY")
    print("=" * 60)
    print(f"Files processed: {len(env_paths)}")
    print(f"Total secrets found: {total_secrets_found}")
    if not dry_run:
        print(f"Total secrets encrypted: {total_secrets_encrypted}")
        print(f"Total errors: {total_errors}")
        if total_errors == 0 and total_secrets_encrypted > 0:
            print("\n🎉 All secrets encrypted successfully!")
        elif total_errors > 0:
            print(f"\n⚠️  Completed with {total_errors} errors")
        else:
            print("\n✅ No secrets needed encryption")
    else:
        print(f"Secrets that would be encrypted: {total_secrets_found}")
    # Show backup information
    backups_created = [r['backup_created'] for r in all_results if r['backup_created']]
    if backups_created:
        print(f"\n📋 Backups created: {len(backups_created)}")
        for backup in backups_created:
            print(f"   - {Path(backup).name}")
    # Show errors if any
    all_errors = []
    for results in all_results:
        all_errors.extend(results['errors'])
    if all_errors:
        print(f"\n❌ Errors encountered:")
        for error in all_errors:
            print(f"   - {error}")
    return {
        'total_files': len(env_paths),
        'total_secrets_found': total_secrets_found,
        'total_secrets_encrypted': total_secrets_encrypted,
        'total_errors': total_errors,
        'files': all_results
    }
 def main():
    parser = argparse.ArgumentParser(description='Encrypt all *_SECRET variables in all environment files')
    parser.add_argument('--files', '-f', nargs='+', 
                       help='Specific environment files to process (default: all three env files)')
    parser.add_argument('--dry-run', action='store_true',
                       help='Show what would be changed without making changes')
    parser.add_argument('--no-backup', action='store_true',
                       help='Skip creating backup files')
    args = parser.parse_args()
    try:
        results = process_all_env_files(
            env_files=args.files,
            dry_run=args.dry_run,
            create_backup=not args.no_backup
        )
        # Return appropriate exit code
        if results['total_errors'] > 0:
            return 1
        return 0
    except Exception as e:
        print(f"Error: {e}")
        return 1
 if __name__ == '__main__':
    sys.exit(main())