Multi-document output implemented

2025-10-14 00:23:59 +02:00 · 2025-10-14 00:23:59 +02:00 · 0bc71c99d5
commit 0bc71c99d5
parent 0c357dc8a9
6 changed files with 1448 additions and 50 deletions
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@ -649,6 +649,11 @@ class AiService:
            
            for part in ec.parts:
                if part.typeGroup in ("text", "table", "structure", "image", "container", "binary"):
+                    # Skip empty container chunks (they're just metadata containers)
+                    if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
+                        logger.debug(f"Skipping empty container chunk: mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
+                        continue
+                    
                    chunks_to_process.append({
                        'part': part,
                        'chunk_index': chunk_index,
@ -764,7 +769,14 @@ class AiService:
                elif part.typeGroup in ("container", "binary"):
                    # Handle ALL container and binary content generically - let AI process any document type
                    self.services.utils.debugLogToFile(f"DEBUG: Chunk {chunk_index}: typeGroup={part.typeGroup}, mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
-                    if part.mimeType and part.data and len(part.data.strip()) > 0:
+                    
+                    # Skip empty container chunks (they're just metadata containers)
+                    if part.typeGroup == "container" and (not part.data or len(part.data.strip()) == 0):
+                        self.services.utils.debugLogToFile(f"DEBUG: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}", "AI_SERVICE")
+                        logger.info(f"Chunk {chunk_index}: Skipping empty container - mimeType={part.mimeType}, data_length={len(part.data) if part.data else 0}")
+                        # Skip processing this chunk
+                        pass
+                    elif part.mimeType and part.data and len(part.data.strip()) > 0:
                        # Process any document container as text content
                        request_options = options if options is not None else AiCallOptions()
                        request_options.operationType = OperationType.GENERAL
@ -869,12 +881,19 @@ class AiService:
                    # Log extraction context length
                    self.services.utils.debugLogToFile(f"EXTRACTION CONTEXT LENGTH: {len(part.data) if part.data else 0} characters", "AI_SERVICE")
                    
+                    # Debug: Log the actual prompt being sent to AI
+                    logger.debug(f"AI PROMPT PREVIEW: {prompt[:300]}...")
+                    logger.debug(f"AI CONTEXT PREVIEW: {part.data[:200] if part.data else 'None'}...")
+                    
                    request = AiCallRequest(
                        prompt=prompt,
                        context=part.data,
                        options=request_options
                    )
                    response = await self.aiObjects.call(request)
+                    
+                    # Debug: Log what AI actually returned
+                    logger.debug(f"AI RESPONSE PREVIEW: {response.content[:300] if response.content else 'None'}...")
                    ai_result = response.content
                    
                    # Log extraction response length
@ -900,16 +919,20 @@ class AiService:
                            import json
                            import re
                            
-                            # Clean the response - remove markdown code blocks if present
+                            # Clean the response - remove markdown code blocks and extra formatting
                            cleaned_result = ai_result.strip()
-                            if cleaned_result.startswith('```json'):
-                                # Remove ```json from start and ``` from end
-                                cleaned_result = re.sub(r'^```json\s*', '', cleaned_result)
-                                cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
-                            elif cleaned_result.startswith('```'):
-                                # Remove ``` from start and end
-                                cleaned_result = re.sub(r'^```\s*', '', cleaned_result)
-                                cleaned_result = re.sub(r'\s*```$', '', cleaned_result)
+                            
+                            # Remove any markdown code block markers (```json, ```, etc.)
+                            cleaned_result = re.sub(r'^```(?:json)?\s*', '', cleaned_result, flags=re.MULTILINE)
+                            cleaned_result = re.sub(r'\s*```\s*$', '', cleaned_result, flags=re.MULTILINE)
+                            
+                            # Remove any remaining ``` markers anywhere in the text
+                            cleaned_result = re.sub(r'```', '', cleaned_result)
+                            
+                            # Try to extract JSON from the response if it's embedded in other text
+                            json_match = re.search(r'\{.*\}', cleaned_result, re.DOTALL)
+                            if json_match:
+                                cleaned_result = json_match.group(0)
                            
                            # Validate JSON
                            json.loads(cleaned_result)
@ -1193,7 +1216,13 @@ class AiService:
                        # Parse JSON from AI result
                        chunk_json = json.loads(chunk_result.aiResult)
                        
-                        # Extract sections from this chunk
+                        # Check if this is a multi-file response (has "documents" key)
+                        if isinstance(chunk_json, dict) and "documents" in chunk_json:
+                            # This is a multi-file response - return it as-is
+                            logger.info("Detected multi-file response from AI - preserving structure")
+                            return chunk_json
+                        
+                        # Extract sections from single-file response
                        if isinstance(chunk_json, dict) and "sections" in chunk_json:
                            for section in chunk_json["sections"]:
                                # Add document context to section
@ -1527,6 +1556,152 @@ class AiService:
        # This ensures MIME-type checking, chunk mapping, and parallel processing
        return await self._processDocumentsPerChunk(documents, prompt, options)
        
+    async def _callAiDirect(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions
+        ) -> Dict[str, Any]:
+        """
+        Call AI directly with prompt and documents for JSON output.
+        Used for multi-file generation - uses the existing generation pipeline.
+        """
+        # Use the existing generation pipeline that already works
+        # This ensures proper document processing and content extraction
+        logger.info(f"Using existing generation pipeline for {len(documents) if documents else 0} documents")
+        
+        # Process documents with JSON merging using the existing pipeline
+        result = await self._processDocumentsPerChunkJson(documents, prompt, options)
+        
+        # Convert single-file result to multi-file format if needed
+        if "sections" in result and "documents" not in result:
+            logger.info("Converting single-file result to multi-file format")
+            # This is a single-file result, convert it to multi-file format
+            return {
+                "metadata": result.get("metadata", {"title": "Converted Document"}),
+                "documents": [{
+                    "id": "doc_1",
+                    "title": result.get("metadata", {}).get("title", "Document"),
+                    "filename": "document.txt",
+                    "sections": result.get("sections", [])
+                }]
+            }
+        
+        return result
+
+    async def _processDocumentsPerChunkJsonWithPrompt(
+        self,
+        documents: List[ChatDocument],
+        custom_prompt: str,
+        options: Optional[AiCallOptions] = None
+        ) -> Dict[str, Any]:
+        """
+        Process documents with per-chunk AI calls and merge results in JSON mode.
+        Uses a custom prompt instead of the default extraction prompt.
+        """
+        if not documents:
+            return {"metadata": {"title": "Empty Document"}, "sections": []}
+        
+        # Get model capabilities for size calculation
+        model_capabilities = self._getModelCapabilitiesForContent(custom_prompt, documents, options)
+        
+        # Build extraction options for chunking with intelligent merging
+        extractionOptions: Dict[str, Any] = {
+            "prompt": custom_prompt,  # Use the custom prompt instead of default
+            "operationType": options.operationType if options else "general",
+            "processDocumentsIndividually": True,  # Process each document separately
+            "maxSize": model_capabilities["maxContextBytes"],
+            "chunkAllowed": True,
+            "textChunkSize": model_capabilities["textChunkSize"],
+            "imageChunkSize": model_capabilities["imageChunkSize"],
+            "imageMaxPixels": 1024 * 1024,
+            "imageQuality": 85,
+            "mergeStrategy": {
+                "useIntelligentMerging": True,  # Enable intelligent token-aware merging
+                "modelCapabilities": model_capabilities,
+                "prompt": custom_prompt,  # Use the custom prompt
+                "groupBy": "typeGroup",
+                "orderBy": "id",
+                "mergeType": "concatenate"
+            },
+        }
+        
+        logger.debug(f"Per-chunk extraction options (JSON mode): prompt length={len(extractionOptions.get('prompt', ''))} chars, operationType={extractionOptions.get('operationType')}")
+        
+        try:
+            # Extract content with chunking
+            extractionResult = self.extractionService.extractContent(documents, extractionOptions)
+            
+            if not isinstance(extractionResult, list):
+                return {"metadata": {"title": "Error Document"}, "sections": []}
+            
+            # Process chunks with proper mapping
+            logger.info(f"Processing {len(extractionResult)} chunks with custom prompt")
+            logger.debug(f"Custom prompt preview: {custom_prompt[:200]}...")
+            
+            # Debug: Show what content is being processed (before filtering)
+            for i, ec in enumerate(extractionResult):
+                logger.debug(f"ContentExtracted {i}: id={ec.id}, parts={len(ec.parts) if hasattr(ec, 'parts') else 'no parts'}")
+                
+                # Check each part within the ContentExtracted
+                if hasattr(ec, 'parts'):
+                    for j, part in enumerate(ec.parts):
+                        if hasattr(part, 'data') and part.data:
+                            logger.debug(f"  Part {j} content preview: {part.data[:200]}...")
+                        else:
+                            # Check what attributes the part actually has
+                            part_attrs = [attr for attr in dir(part) if not attr.startswith('_')]
+                            part_type = getattr(part, 'typeGroup', None)
+                            part_mime = getattr(part, 'mimeType', '')
+                            has_data = hasattr(part, 'data') and bool(part.data)
+                            
+                            logger.debug(f"  Part {j} DEBUG: available_attrs={part_attrs}")
+                            logger.debug(f"  Part {j} DEBUG: typeGroup='{part_type}', mimeType='{part_mime}', has_data={has_data}")
+                            
+                            # Check if this is an empty container chunk (which is expected)
+                            is_empty_container = False
+                            if part_type == "container" and part_mime and 'document' in part_mime.lower():
+                                is_empty_container = True
+                            
+                            if is_empty_container:
+                                logger.debug(f"  Part {j} is empty container (will be filtered out) - mimeType={part_mime}")
+                            else:
+                                logger.warning(f"  Part {j} has no data - typeGroup='{part_type}', mimeType='{part_mime}', attrs={part_attrs}")
+                else:
+                    logger.warning(f"ContentExtracted {i} has no parts attribute")
+            
+            chunkResults = await self._processChunksWithMapping(extractionResult, custom_prompt, options, generate_json=True)
+            
+            # Debug: Show what chunks were actually processed (after filtering)
+            logger.info(f"After filtering: {len(chunkResults)} chunks will be processed")
+            for i, chunk_result in enumerate(chunkResults):
+                if chunk_result and chunk_result.metadata.get("success", False):
+                    logger.debug(f"Processed chunk {i}: {chunk_result.metadata.get('typeGroup', 'unknown')} - {len(chunk_result.aiResult)} chars")
+                else:
+                    logger.debug(f"Processed chunk {i}: error or skipped")
+            
+            # Merge with JSON mode
+            mergedJsonDocument = self._mergeChunkResultsJson(chunkResults, options)
+            
+            # Debug: Show what the AI actually returned
+            logger.info(f"AI returned document with keys: {list(mergedJsonDocument.keys())}")
+            if 'sections' in mergedJsonDocument:
+                logger.info(f"Number of sections: {len(mergedJsonDocument['sections'])}")
+                if mergedJsonDocument['sections']:
+                    logger.debug(f"First section preview: {str(mergedJsonDocument['sections'][0])[:200]}...")
+                else:
+                    logger.warning("AI returned empty sections array")
+            if 'documents' in mergedJsonDocument:
+                logger.info(f"Number of documents: {len(mergedJsonDocument['documents'])}")
+            else:
+                logger.warning("AI did not return 'documents' key - this is single-file format")
+            
+            return mergedJsonDocument
+            
+        except Exception as e:
+            logger.error(f"Error in per-chunk JSON processing: {str(e)}")
+            return {"metadata": {"title": "Error Document"}, "sections": []}
+
    async def _callAiJson(
        self,
        prompt: str,
@ -1821,6 +1996,88 @@ class AiService:
        target_length = int(len(text) * reduction_factor)
        return text[:target_length] + "... [reduced]"

+    async def _analyzePromptIntent(self, prompt: str, ai_service=None) -> Dict[str, Any]:
+        """Use AI to analyze user prompt and determine processing requirements."""
+        if not ai_service:
+            return {"is_multi_file": False, "strategy": "single", "criteria": None}
+        
+        try:
+            analysis_prompt = f"""
+Analyze this user request and determine if it requires multiple file output or single file output.
+
+User request: "{prompt}"
+
+Respond with JSON only in this exact format:
+{{
+    "is_multi_file": true/false,
+    "strategy": "single|per_entity|by_section|by_criteria|custom",
+    "criteria": "description of how to split content",
+    "file_naming_pattern": "suggested pattern for filenames",
+    "reasoning": "brief explanation of the analysis"
+}}
+
+Consider:
+- Does the user want separate files for different entities (customers, products, etc.)?
+- Does the user want to split content into multiple documents?
+- What would be the most logical way to organize the content?
+- What language is the request in? (analyze in the original language)
+
+Return only the JSON response.
+"""
+            
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+            response = await ai_service.aiObjects.call(request)
+            
+            if response and response.content:
+                import json
+                import re
+                
+                # Extract JSON from response
+                result = response.content.strip()
+                json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                
+                analysis = json.loads(result)
+                return analysis
+            else:
+                return {"is_multi_file": False, "strategy": "single", "criteria": None}
+                
+        except Exception as e:
+            logger.warning(f"AI prompt analysis failed: {str(e)}, defaulting to single file")
+            return {"is_multi_file": False, "strategy": "single", "criteria": None}
+
+    def _validateResponseStructure(self, response: Dict[str, Any], prompt_analysis: Dict[str, Any]) -> bool:
+        """Validate that AI response matches the expected structure."""
+        try:
+            if not isinstance(response, dict):
+                logger.warning(f"Response validation failed: Response is not a dict, got {type(response)}")
+                return False
+            
+            # Check for multi-file structure
+            if prompt_analysis.get("is_multi_file", False):
+                has_documents = "documents" in response
+                is_documents_list = isinstance(response.get("documents"), list)
+                logger.info(f"Multi-file validation: has_documents={has_documents}, is_documents_list={is_documents_list}")
+                if has_documents and is_documents_list:
+                    logger.info(f"Multi-file validation passed: {len(response['documents'])} documents found")
+                else:
+                    logger.warning(f"Multi-file validation failed: documents key present={has_documents}, documents is list={is_documents_list}")
+                    logger.warning(f"Available keys: {list(response.keys())}")
+                return has_documents and is_documents_list
+            else:
+                has_sections = "sections" in response
+                is_sections_list = isinstance(response.get("sections"), list)
+                logger.info(f"Single-file validation: has_sections={has_sections}, is_sections_list={is_sections_list}")
+                return has_sections and is_sections_list
+        except Exception as e:
+            logger.warning(f"Response validation failed with exception: {str(e)}")
+            return False
+
    async def _callAiWithDocumentGeneration(
        self,
        prompt: str,
@ -1831,6 +2088,7 @@ class AiService:
        ) -> Dict[str, Any]:
        """
        Handle AI calls with document generation in specific output format.
+        Now supports both single-file and multi-file generation.
        
        Args:
            prompt: The main prompt for the AI call
@ -1842,6 +2100,43 @@ class AiService:
        Returns:
            Dict with generated documents and metadata
        """
+        try:
+            # Use AI to analyze prompt intent
+            prompt_analysis = await self._analyzePromptIntent(prompt, self)
+            logger.info(f"Prompt analysis result: {prompt_analysis}")
+            
+            if prompt_analysis.get("is_multi_file", False):
+                return await self._callAiWithMultiFileGeneration(
+                    prompt, documents, options, outputFormat, title, prompt_analysis
+                )
+            else:
+                return await self._callAiWithSingleFileGeneration(
+                    prompt, documents, options, outputFormat, title
+                )
+            
+        except Exception as e:
+            logger.error(f"Error in document generation: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "content": "",
+                "rendered_content": "",
+                "mime_type": "text/plain",
+                "filename": f"error_{outputFormat}",
+                "format": outputFormat,
+                "title": title or "Error",
+                "documents": []
+            }
+
+    async def _callAiWithSingleFileGeneration(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        outputFormat: str,
+        title: Optional[str]
+        ) -> Dict[str, Any]:
+        """Handle single-file document generation (existing functionality)."""
        try:
            # Get format-specific extraction prompt from generation service
            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
@ -1912,20 +2207,216 @@ class AiService:
                    "documentName": filename,
                    "documentData": renderedContent,
                    "mimeType": mimeType
-                }]
+                }],
+                "is_multi_file": False
            }
            
        except Exception as e:
-            logger.error(f"Error in document generation: {str(e)}")
-            return {
-                "success": False,
-                "error": str(e),
-                "content": "",
-                "rendered_content": "",
-                "mime_type": "text/plain",
-                "filename": f"error_{outputFormat}",
-                "format": outputFormat,
-                "title": title or "Error",
-                "documents": []
-            }
+            logger.error(f"Error in single-file document generation: {str(e)}")
+            raise
+
+    async def _callAiWithMultiFileGeneration(
+        self,
+        prompt: str,
+        documents: Optional[List[ChatDocument]],
+        options: AiCallOptions,
+        outputFormat: str,
+        title: Optional[str],
+        prompt_analysis: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Handle multi-file document generation using AI analysis."""
+        try:
+            # Get multi-file extraction prompt based on AI analysis
+            from modules.services.serviceGeneration.mainServiceGeneration import GenerationService
+            generation_service = GenerationService(self.services)
+            
+            # Use default title if not provided
+            if not title:
+                title = "AI Generated Documents"
+            
+            # Get adaptive extraction prompt
+            extraction_prompt = await generation_service.getAdaptiveExtractionPrompt(
+                outputFormat=outputFormat,
+                userPrompt=prompt,
+                title=title,
+                promptAnalysis=prompt_analysis,
+                aiService=self
+            )
+            
+            logger.info(f"Adaptive extraction prompt length: {len(extraction_prompt)} characters")
+            logger.debug(f"Adaptive extraction prompt preview: {extraction_prompt[:500]}...")
+            
+            # Process with adaptive JSON schema - use the existing pipeline but with adaptive prompt
+            logger.info(f"Using adaptive prompt with existing pipeline: {len(extraction_prompt)} chars")
+            logger.debug(f"Processing documents: {len(documents) if documents else 0} documents")
+            
+            # Use the existing pipeline but replace the prompt with our adaptive one
+            # This ensures proper document processing while using the multi-file prompt
+            ai_response = await self._processDocumentsPerChunkJsonWithPrompt(documents, extraction_prompt, options)
+            
+            logger.info(f"AI response type: {type(ai_response)}")
+            logger.info(f"AI response keys: {list(ai_response.keys()) if isinstance(ai_response, dict) else 'Not a dict'}")
+            logger.debug(f"AI response preview: {str(ai_response)[:500]}...")
+            
+            # Validate response structure
+            if not self._validateResponseStructure(ai_response, prompt_analysis):
+                # Fallback to single-file if multi-file fails
+                logger.warning(f"Multi-file processing failed - Invalid response structure. Expected multi-file but got: {list(ai_response.keys()) if isinstance(ai_response, dict) else type(ai_response)}")
+                logger.warning(f"Prompt analysis: {prompt_analysis}")
+                logger.warning("Falling back to single-file generation")
+                return await self._callAiWithSingleFileGeneration(
+                    prompt, documents, options, outputFormat, title
+                )
+            
+            # Process multiple documents
+            generated_documents = []
+            for i, doc_data in enumerate(ai_response.get("documents", [])):
+                # Transform AI-generated sections to renderer-compatible format
+                transformed_sections = []
+                for section in doc_data.get("sections", []):
+                    # Convert AI format to renderer format
+                    transformed_section = {
+                        "id": section.get("id", f"section_{len(transformed_sections) + 1}"),
+                        "type": section.get("content_type", "paragraph"),
+                        "data": {
+                            "text": "",
+                            "elements": section.get("elements", [])
+                        },
+                        "order": section.get("order", len(transformed_sections) + 1)
+                    }
+                    
+                    # Extract text from elements for simple text-based sections
+                    if section.get("content_type") in ["paragraph", "heading"]:
+                        text_parts = []
+                        for element in section.get("elements", []):
+                            if "text" in element:
+                                text_parts.append(element["text"])
+                        transformed_section["data"]["text"] = "\n".join(text_parts)
+                    
+                    transformed_sections.append(transformed_section)
+                
+                # Create complete document structure for rendering
+                complete_document = {
+                    "metadata": {
+                        "title": doc_data["title"],
+                        "source_document": "multi_file_generation",
+                        "document_id": doc_data.get("id", f"doc_{i+1}"),
+                        "filename": doc_data.get("filename", f"document_{i+1}"),
+                        "split_strategy": prompt_analysis.get("strategy", "custom")
+                    },
+                    "sections": transformed_sections,
+                    "summary": f"Generated document: {doc_data['title']}",
+                    "tags": ["multi_file", "ai_generated"]
+                }
+                
+                rendered_content, mime_type = await generation_service.renderReport(
+                    extractedContent=complete_document,
+                    outputFormat=outputFormat,
+                    title=doc_data["title"],
+                    userPrompt=prompt,
+                    aiService=self
+                )
+                
+                # Generate proper filename with correct extension
+                base_filename = doc_data.get("filename", f"document_{i+1}")
+                # Remove any existing extension and add the correct one
+                if '.' in base_filename:
+                    base_filename = base_filename.rsplit('.', 1)[0]
+                
+                # Add proper extension based on output format
+                if outputFormat.lower() == "docx":
+                    filename = f"{base_filename}.docx"
+                elif outputFormat.lower() == "pdf":
+                    filename = f"{base_filename}.pdf"
+                elif outputFormat.lower() == "html":
+                    filename = f"{base_filename}.html"
+                else:
+                    filename = f"{base_filename}.{outputFormat}"
+                
+                generated_documents.append({
+                    "documentName": filename,
+                    "documentData": rendered_content,
+                    "mimeType": mime_type
+                })
+            
+            # Save debug files for multi-file generation - only if debug enabled
+            debug_enabled = self.services.utils.configGet("APP_DEBUG_CHAT_WORKFLOW_ENABLED", False)
+            if debug_enabled:
+                try:
+                    import os
+                    from datetime import datetime, UTC
+                    ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
+                    debug_root = "./test-chat/ai"
+                    debug_dir = os.path.join(debug_root, f"multifile_output_{ts}")
+                    os.makedirs(debug_dir, exist_ok=True)
+                    
+                    # Save metadata
+                    with open(os.path.join(debug_dir, "metadata.txt"), "w", encoding="utf-8") as f:
+                        f.write(f"title: {title}\n")
+                        f.write(f"format: {outputFormat}\n")
+                        f.write(f"documents_count: {len(generated_documents)}\n")
+                        f.write(f"split_strategy: {prompt_analysis.get('strategy', 'custom')}\n")
+                        f.write(f"prompt_analysis: {prompt_analysis}\n")
+                    
+                    # Save each generated document
+                    for i, doc in enumerate(generated_documents):
+                        doc_filename = doc["documentName"]
+                        doc_data = doc["documentData"]
+                        doc_mime = doc["mimeType"]
+                        
+                        # Determine file extension
+                        if outputFormat.lower() == "docx":
+                            file_ext = ".docx"
+                        elif outputFormat.lower() == "pdf":
+                            file_ext = ".pdf"
+                        elif outputFormat.lower() == "html":
+                            file_ext = ".html"
+                        else:
+                            file_ext = f".{outputFormat}"
+                        
+                        # Save the rendered document
+                        output_path = os.path.join(debug_dir, f"document_{i+1}_{doc_filename}")
+                        
+                        if file_ext in ['.md', '.txt', '.html', '.json', '.csv']:
+                            # Text-based formats
+                            with open(output_path, 'w', encoding='utf-8') as f:
+                                f.write(doc_data)
+                        else:
+                            # Binary formats - decode from base64 if needed
+                            try:
+                                import base64
+                                doc_bytes = base64.b64decode(doc_data)
+                                with open(output_path, 'wb') as f:
+                                    f.write(doc_bytes)
+                            except Exception:
+                                # If not base64, save as text
+                                with open(output_path, 'w', encoding='utf-8') as f:
+                                    f.write(doc_data)
+                        
+                        logger.info(f"💾 Debug: Saved multi-file document {i+1}: {output_path}")
+                    
+                    logger.info(f"💾 Debug: Multi-file output saved to: {debug_dir}")
+                    
+                except Exception as e:
+                    logger.warning(f"Failed to save multi-file debug output: {e}")
+            
+            return {
+                "success": True,
+                "content": ai_response,
+                "rendered_content": None,  # Not applicable for multi-file
+                "mime_type": None,  # Not applicable for multi-file
+                "filename": None,  # Not applicable for multi-file
+                "format": outputFormat,
+                "title": title,
+                "documents": generated_documents,
+                "is_multi_file": True,
+                "split_strategy": prompt_analysis.get("strategy", "custom")
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in multi-file document generation: {str(e)}")
+            # Fallback to single-file
+            return await self._callAiWithSingleFileGeneration(
+                prompt, documents, options, outputFormat, title
+            )

--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@ -1,6 +1,6 @@
 import logging
 import uuid
-from typing import Any, Dict, List, Optional
+from typing import Any, Dict, List, Optional, Union, Tuple
 from datetime import datetime, UTC
 import re
 from modules.shared.timezoneUtils import get_utc_timestamp
@ -372,6 +372,42 @@ class GenerationService:
            logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
            raise
    
+    async def getAdaptiveExtractionPrompt(
+        self,
+        outputFormat: str,
+        userPrompt: str,
+        title: str,
+        promptAnalysis: Dict[str, Any],
+        aiService=None
+    ) -> str:
+        """Get adaptive extraction prompt based on AI analysis."""
+        from .subPromptBuilder import buildAdaptiveExtractionPrompt
+        return await buildAdaptiveExtractionPrompt(
+            outputFormat=outputFormat,
+            userPrompt=userPrompt,
+            title=title,
+            promptAnalysis=promptAnalysis,
+            aiService=aiService,
+            services=self.services
+        )
+    
+    async def getGenericExtractionPrompt(
+        self,
+        outputFormat: str,
+        userPrompt: str,
+        title: str,
+        aiService=None
+    ) -> str:
+        """Get generic extraction prompt that works for both single and multi-file."""
+        from .subPromptBuilder import buildGenericExtractionPrompt
+        return await buildGenericExtractionPrompt(
+            outputFormat=outputFormat,
+            userPrompt=userPrompt,
+            title=title,
+            aiService=aiService,
+            services=self.services
+        )
+
    async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
        """
        Get the format-specific extraction prompt for AI content extraction.
@ -409,6 +445,75 @@ class GenerationService:
            logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
            raise

+    async def renderAdaptiveReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None,
+        isMultiFile: bool = False
+    ) -> Union[Tuple[str, str], List[Dict[str, Any]]]:
+        """Render report adaptively based on content structure."""
+        
+        if isMultiFile and "documents" in extractedContent:
+            return await self._renderMultiFileReport(
+                extractedContent, outputFormat, title, userPrompt, aiService
+            )
+        else:
+            return await self._renderSingleFileReport(
+                extractedContent, outputFormat, title, userPrompt, aiService
+            )
+    
+    async def _renderMultiFileReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None
+    ) -> List[Dict[str, Any]]:
+        """Render multiple documents from extracted content."""
+        
+        generated_documents = []
+        
+        for doc_data in extractedContent.get("documents", []):
+            # Use existing single-file renderer for each document
+            renderer = self._getFormatRenderer(outputFormat)
+            if not renderer:
+                continue
+            
+            # Render individual document
+            rendered_content, mime_type = await renderer.render(
+                extractedContent={"sections": doc_data["sections"]},
+                title=doc_data["title"],
+                userPrompt=userPrompt,
+                aiService=aiService
+            )
+            
+            generated_documents.append({
+                "filename": doc_data["filename"],
+                "content": rendered_content,
+                "mime_type": mime_type,
+                "title": doc_data["title"]
+            })
+        
+        return generated_documents
+    
+    async def _renderSingleFileReport(
+        self,
+        extractedContent: Dict[str, Any],
+        outputFormat: str,
+        title: str,
+        userPrompt: str = None,
+        aiService=None
+    ) -> Tuple[str, str]:
+        """Render single file report (existing functionality)."""
+        # Use existing renderReport method
+        return await self.renderReport(
+            extractedContent, outputFormat, title, userPrompt, aiService
+        )
+
    def _getFormatRenderer(self, output_format: str):
        """Get the appropriate renderer for the specified format using auto-discovery."""
        try:
--- a/modules/services/serviceGeneration/subJsonSchema.py
+++ b/modules/services/serviceGeneration/subJsonSchema.py
@ -6,8 +6,197 @@ This module provides schemas that guide AI to generate structured JSON output.
 from typing import Dict, Any


+def get_multi_document_subJsonSchema() -> Dict[str, Any]:
+    """Get the JSON schema for multi-document generation."""
+    return {
+        "type": "object",
+        "required": ["metadata", "documents"],
+        "properties": {
+            "metadata": {
+                "type": "object",
+                "required": ["title", "splitStrategy"],
+                "properties": {
+                    "title": {"type": "string", "description": "Document title"},
+                    "splitStrategy": {
+                        "type": "string",
+                        "enum": ["per_entity", "by_section", "by_criteria", "by_data_type", "custom"],
+                        "description": "Strategy for splitting content into multiple files"
+                    },
+                    "splitCriteria": {
+                        "type": "object",
+                        "description": "Custom criteria for splitting (e.g., entity_id, category, etc.)"
+                    },
+                    "fileNamingPattern": {
+                        "type": "string",
+                        "description": "Pattern for generating filenames (e.g., '{entity_name}_data.docx')"
+                    },
+                    "author": {"type": "string", "description": "Document author (optional)"},
+                    "source_documents": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "List of source document IDs"
+                    },
+                    "extraction_method": {
+                        "type": "string",
+                        "default": "ai_extraction",
+                        "description": "Method used for extraction"
+                    }
+                }
+            },
+            "documents": {
+                "type": "array",
+                "description": "Array of individual documents to generate",
+                "items": {
+                    "type": "object",
+                    "required": ["id", "title", "sections", "filename"],
+                    "properties": {
+                        "id": {"type": "string", "description": "Unique document identifier"},
+                        "title": {"type": "string", "description": "Document title"},
+                        "filename": {"type": "string", "description": "Generated filename"},
+                        "sections": {
+                            "type": "array",
+                            "description": "Document sections containing structured content",
+                            "items": {
+                                "type": "object",
+                                "required": ["id", "content_type", "elements", "order"],
+                                "properties": {
+                                    "id": {"type": "string", "description": "Unique section identifier"},
+                                    "title": {"type": "string", "description": "Section title (optional)"},
+                                    "content_type": {
+                                        "type": "string",
+                                        "enum": ["table", "list", "paragraph", "heading", "code", "image", "mixed"],
+                                        "description": "Primary content type of this section"
+                                    },
+                                    "elements": {
+                                        "type": "array",
+                                        "description": "Content elements in this section",
+                                        "items": {
+                                            "oneOf": [
+                                                {"$ref": "#/definitions/table"},
+                                                {"$ref": "#/definitions/bullet_list"},
+                                                {"$ref": "#/definitions/paragraph"},
+                                                {"$ref": "#/definitions/heading"},
+                                                {"$ref": "#/definitions/code_block"}
+                                            ]
+                                        }
+                                    },
+                                    "order": {"type": "integer", "description": "Section order in document"},
+                                    "metadata": {
+                                        "type": "object",
+                                        "description": "Additional section metadata"
+                                    }
+                                }
+                            }
+                        },
+                        "metadata": {
+                            "type": "object",
+                            "description": "Document-specific metadata"
+                        }
+                    }
+                }
+            }
+        },
+        "definitions": {
+            "table": {
+                "type": "object",
+                "required": ["headers", "rows"],
+                "properties": {
+                    "headers": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "Table column headers"
+                    },
+                    "rows": {
+                        "type": "array",
+                        "items": {
+                            "type": "array",
+                            "items": {"type": "string"}
+                        },
+                        "description": "Table data rows"
+                    },
+                    "caption": {
+                        "type": "string",
+                        "description": "Table caption (optional)"
+                    }
+                }
+            },
+            "bullet_list": {
+                "type": "object",
+                "required": ["items"],
+                "properties": {
+                    "items": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "required": ["text"],
+                            "properties": {
+                                "text": {"type": "string", "description": "List item text"},
+                                "subitems": {
+                                    "type": "array",
+                                    "items": {"$ref": "#/definitions/list_item"},
+                                    "description": "Nested sub-items (optional)"
+                                }
+                            }
+                        },
+                        "description": "List items"
+                    },
+                    "list_type": {
+                        "type": "string",
+                        "enum": ["bullet", "numbered", "checklist"],
+                        "default": "bullet",
+                        "description": "Type of list"
+                    }
+                }
+            },
+            "list_item": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "List item text"},
+                    "subitems": {
+                        "type": "array",
+                        "items": {"$ref": "#/definitions/list_item"},
+                        "description": "Nested sub-items (optional)"
+                    }
+                }
+            },
+            "paragraph": {
+                "type": "object",
+                "required": ["text"],
+                "properties": {
+                    "text": {"type": "string", "description": "Paragraph text"},
+                    "formatting": {
+                        "type": "object",
+                        "description": "Text formatting (bold, italic, etc.)"
+                    }
+                }
+            },
+            "heading": {
+                "type": "object",
+                "required": ["text", "level"],
+                "properties": {
+                    "text": {"type": "string", "description": "Heading text"},
+                    "level": {
+                        "type": "integer",
+                        "minimum": 1,
+                        "maximum": 6,
+                        "description": "Heading level (1-6)"
+                    }
+                }
+            },
+            "code_block": {
+                "type": "object",
+                "required": ["code"],
+                "properties": {
+                    "code": {"type": "string", "description": "Code content"},
+                    "language": {"type": "string", "description": "Programming language (optional)"}
+                }
+            }
+        }
+    }
+
 def get_document_subJsonSchema() -> Dict[str, Any]:
-    """Get the JSON schema for structured document generation."""
+    """Get the JSON schema for structured document generation (single document)."""
    return {
        "type": "object",
        "required": ["metadata", "sections"],
@ -227,6 +416,13 @@ Return only the enhanced JSON structure following the schema. Do not include any
 """


+def get_adaptive_json_schema(prompt_analysis: Dict[str, Any] = None) -> Dict[str, Any]:
+    """Automatically select appropriate schema based on prompt analysis."""
+    if prompt_analysis and prompt_analysis.get("is_multi_file", False):
+        return get_multi_document_subJsonSchema()
+    else:
+        return get_document_subJsonSchema()
+
 def validate_json_document(json_data: Dict[str, Any]) -> bool:
    """Validate that the JSON data follows the document schema."""
    try:
@ -234,35 +430,86 @@ def validate_json_document(json_data: Dict[str, Any]) -> bool:
        if not isinstance(json_data, dict):
            return False
        
-        if "metadata" not in json_data or "sections" not in json_data:
-            return False
-        
-        metadata = json_data["metadata"]
-        if not isinstance(metadata, dict) or "title" not in metadata:
-            return False
-        
-        sections = json_data["sections"]
-        if not isinstance(sections, list):
-            return False
-        
-        # Validate each section
-        for i, section in enumerate(sections):
-            if not isinstance(section, dict):
+        # Check if it's multi-document or single-document structure
+        if "documents" in json_data:
+            # Multi-document structure
+            if "metadata" not in json_data:
                return False
            
-            required_fields = ["id", "content_type", "elements", "order"]
-            for field in required_fields:
-                if field not in section:
+            metadata = json_data["metadata"]
+            if not isinstance(metadata, dict) or "title" not in metadata or "splitStrategy" not in metadata:
+                return False
+            
+            documents = json_data["documents"]
+            if not isinstance(documents, list):
+                return False
+            
+            # Validate each document
+            for doc in documents:
+                if not isinstance(doc, dict):
                    return False
+                
+                required_fields = ["id", "title", "sections", "filename"]
+                for field in required_fields:
+                    if field not in doc:
+                        return False
+                
+                # Validate sections in each document
+                sections = doc.get("sections", [])
+                if not isinstance(sections, list):
+                    return False
+                
+                for section in sections:
+                    if not isinstance(section, dict):
+                        return False
+                    
+                    section_required = ["id", "content_type", "elements", "order"]
+                    for field in section_required:
+                        if field not in section:
+                            return False
+                    
+                    # Validate content_type
+                    valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+                    if section["content_type"] not in valid_types:
+                        return False
+                    
+                    # Validate elements
+                    if not isinstance(section["elements"], list):
+                        return False
            
-            # Validate content_type
-            valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
-            if section["content_type"] not in valid_types:
+        elif "sections" in json_data:
+            # Single-document structure (existing validation)
+            if "metadata" not in json_data:
                return False
            
-            # Validate elements
-            if not isinstance(section["elements"], list):
+            metadata = json_data["metadata"]
+            if not isinstance(metadata, dict) or "title" not in metadata:
                return False
+            
+            sections = json_data["sections"]
+            if not isinstance(sections, list):
+                return False
+            
+            # Validate each section
+            for i, section in enumerate(sections):
+                if not isinstance(section, dict):
+                    return False
+                
+                required_fields = ["id", "content_type", "elements", "order"]
+                for field in required_fields:
+                    if field not in section:
+                        return False
+                
+                # Validate content_type
+                valid_types = ["table", "list", "paragraph", "heading", "code", "image", "mixed"]
+                if section["content_type"] not in valid_types:
+                    return False
+                
+                # Validate elements
+                if not isinstance(section["elements"], list):
+                    return False
+        else:
+            return False
        
        return True
        
--- a/modules/services/serviceGeneration/subPromptBuilder.py
+++ b/modules/services/serviceGeneration/subPromptBuilder.py
@ -8,7 +8,8 @@ Builds a robust prompt that:
 - Requires the AI to output a filename header that we can parse and use
 """

-from typing import Protocol
+import json
+from typing import Protocol, Dict, Any


 class _RendererLike(Protocol):
@ -16,6 +17,291 @@ class _RendererLike(Protocol):
        ...


+async def buildAdaptiveExtractionPrompt(
+    outputFormat: str,
+    userPrompt: str,
+    title: str,
+    promptAnalysis: Dict[str, Any],
+    aiService=None,
+    services=None
+) -> str:
+    """Build adaptive extraction prompt based on AI analysis."""
+    
+    # Get appropriate JSON schema based on analysis
+    from .subJsonSchema import get_adaptive_json_schema
+    json_schema = get_adaptive_json_schema(promptAnalysis)
+    
+    if promptAnalysis.get("is_multi_file", False):
+        schema_type = "multi-document"
+    else:
+        schema_type = "single-document"
+    
+    # Build adaptive prompt using AI analysis - match single-file style
+    if promptAnalysis.get("is_multi_file", False):
+        # Check if this is JSON email data
+        is_json_email = any(keyword in userPrompt.lower() for keyword in ['email', 'mail', 'json', 'message', 'conversation'])
+        
+        if is_json_email:
+            # Specialized prompt for JSON email data
+            multi_file_example = {
+                "metadata": {
+                    "title": "Email Conversations",
+                    "splitStrategy": "per_entity"
+                },
+                "documents": [
+                    {
+                        "id": "doc_1",
+                        "title": "Email from SENDER to RECIPIENT",
+                        "filename": "email_sender_to_recipient.txt",
+                        "sections": [
+                            {
+                                "id": "section_1",
+                                "content_type": "heading",
+                                "elements": [
+                                    {
+                                        "text": "Email from SENDER to RECIPIENT",
+                                        "level": 1
+                                    }
+                                ],
+                                "order": 1
+                            },
+                            {
+                                "id": "section_2",
+                                "content_type": "paragraph",
+                                "elements": [
+                                    {
+                                        "text": "FULL_EMAIL_CONTENT_HERE"
+                                    }
+                                ],
+                                "order": 2
+                            }
+                        ]
+                    }
+                ]
+            }
+        else:
+            # Generic multi-file prompt
+            multi_file_example = {
+                "metadata": {
+                    "title": "REPLACE_WITH_ACTUAL_DOCUMENT_TITLE",
+                    "splitStrategy": "by_section"
+                },
+                "documents": [
+                    {
+                        "id": "doc_1",
+                        "title": "REPLACE_WITH_ACTUAL_SECTION_TITLE",
+                        "filename": "REPLACE_WITH_ACTUAL_FILENAME",
+                        "sections": [
+                            {
+                                "id": "section_1",
+                                "content_type": "heading",
+                                "elements": [
+                                    {
+                                        "text": "REPLACE_WITH_ACTUAL_HEADING_TEXT",
+                                        "level": 1
+                                    }
+                                ],
+                                "order": 1
+                            },
+                            {
+                                "id": "section_2",
+                                "content_type": "paragraph",
+                                "elements": [
+                                    {
+                                        "text": "REPLACE_WITH_ACTUAL_PARAGRAPH_CONTENT"
+                                    }
+                                ],
+                                "order": 2
+                            }
+                        ]
+                    }
+                ]
+            }
+        
+        adaptive_prompt = f"""
+{userPrompt}
+
+You are a document processing assistant that extracts and structures content from documents. Your task is to analyze the provided document content and create a structured JSON output.
+
+TASK: Extract the actual content from the document and organize it into separate sections, where each section will become a separate file.
+
+REQUIREMENTS:
+1. Analyze the document content provided in the context below
+2. Identify distinct sections in the document (by headings, topics, or logical breaks)
+3. Create one JSON document entry for each section found
+4. Extract the real content from each section (headings, paragraphs, lists, etc.)
+5. Generate appropriate filenames for each section
+
+CRITICAL: You MUST return a JSON structure with a "documents" array, NOT a "sections" array.
+
+OUTPUT FORMAT: Return only valid JSON in this exact structure:
+{json.dumps(multi_file_example, indent=2)}
+
+IMPORTANT: The JSON must have a "documents" key containing an array of document objects. Each document object must have:
+- "id": unique identifier
+- "title": section title from the document
+- "filename": appropriate filename for the section
+- "sections": array of content sections
+
+DO NOT return a JSON with "sections" at the root level. Return a JSON with "documents" at the root level.
+
+INSTRUCTIONS:
+- Replace "REPLACE_WITH_ACTUAL_*" placeholders with real content from the document
+- Use actual section titles, headings, and text from the document
+- Create meaningful filenames based on section content
+- Ensure each section contains the complete content for that part of the document
+- Do not use generic placeholder text like "Section 1", "Section 2"
+- Extract real headings, paragraphs, lists, and other content elements
+- CRITICAL: Return JSON with "documents" array, not "sections" array
+
+CONTEXT (Document Content):
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+""".strip()
+    else:
+        # Single-file prompt - use original style
+        adaptive_prompt = f"""
+{userPrompt}
+
+You are extracting structured content from documents and must respond with valid JSON only.
+
+IMPORTANT: You must respond with valid JSON only. No additional text, explanations, or formatting outside the JSON structure.
+
+Extract the actual data from the source documents and structure it as JSON with this format:
+{json.dumps(json_schema, indent=2)}
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+""".strip()
+    
+    return adaptive_prompt
+
+async def buildGenericExtractionPrompt(
+    outputFormat: str,
+    userPrompt: str,
+    title: str,
+    aiService=None,
+    services=None
+) -> str:
+    """Build generic extraction prompt that works for both single and multi-file."""
+    
+    # Use AI to determine the best approach
+    if aiService:
+        try:
+            analysis_prompt = f"""
+Analyze this user request and determine the best JSON structure for document extraction.
+
+User request: "{userPrompt}"
+
+Respond with JSON only:
+{{
+    "requires_multi_file": true/false,
+    "recommended_schema": "single_document|multi_document",
+    "split_approach": "description of how to organize content",
+    "file_naming": "suggested naming pattern"
+}}
+
+Consider the user's intent and the most logical way to organize the extracted content.
+"""
+            
+            from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationType
+            request_options = AiCallOptions()
+            request_options.operationType = OperationType.GENERAL
+            
+            request = AiCallRequest(prompt=analysis_prompt, context="", options=request_options)
+            response = await aiService.aiObjects.call(request)
+            
+            if response and response.content:
+                import re
+                
+                result = response.content.strip()
+                json_match = re.search(r'\{.*\}', result, re.DOTALL)
+                if json_match:
+                    result = json_match.group(0)
+                
+                analysis = json.loads(result)
+                
+                # Use analysis to build appropriate prompt
+                return await buildAdaptiveExtractionPrompt(
+                    outputFormat, userPrompt, title, analysis, aiService, services
+                )
+        except Exception as e:
+            services.utils.debugLogToFile(f"Generic prompt analysis failed: {str(e)}", "PROMPT_BUILDER")
+    
+    # Fallback to single-file prompt
+    from .subJsonSchema import get_document_subJsonSchema
+    json_schema = get_document_subJsonSchema()
+    
+    return f"""
+{userPrompt}
+
+You are extracting structured content from documents and must respond with valid JSON only.
+
+CRITICAL: You must respond with valid JSON only. No additional text, explanations, markdown formatting, code blocks, or any other content outside the JSON structure. Do not use ``` markers or any other formatting.
+
+Extract the actual data from the source documents and structure it as JSON with this format:
+{json.dumps(json_schema, indent=2)}
+
+Requirements:
+- Preserve all original data - do not summarize or interpret
+- Use the exact JSON schema provided
+- Maintain data integrity and structure
+
+Content Types to Extract:
+1. Tables: Extract all rows and columns with proper headers
+2. Lists: Extract all items with proper nesting  
+3. Headings: Extract with appropriate levels
+4. Paragraphs: Extract as structured text
+5. Code: Extract code blocks with language identification
+6. Images: Analyze images and describe all visible content including text, tables, logos, graphics, layout, and visual elements
+
+Image Analysis Requirements:
+- If you cannot analyze an image for any reason, explain why in the JSON response
+- Describe everything you see in the image
+- Include all text content, tables, logos, graphics, layout, and visual elements
+- If the image is too small, corrupted, or unclear, explain this
+- Always provide feedback - never return empty responses
+
+Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+
+DO NOT return a schema description - return actual extracted content in the JSON format shown above.
+"""
+
 async def buildExtractionPrompt(
    outputFormat: str,
    renderer: _RendererLike,
@ -48,7 +334,7 @@ async def buildExtractionPrompt(

 You are extracting structured content from documents and must respond with valid JSON only.

-IMPORTANT: You must respond with valid JSON only. No additional text, explanations, or formatting outside the JSON structure.
+CRITICAL: You must respond with valid JSON only. No additional text, explanations, markdown formatting, code blocks, or any other content outside the JSON structure. Do not use ``` markers or any other formatting.

 Extract the actual data from the source documents and structure it as JSON with this format:
 {{
@ -106,6 +392,10 @@ Image Analysis Requirements:
 - Always provide feedback - never return empty responses

 Return only the JSON structure with actual data from the documents. Do not include any text before or after the JSON.
+
+Extract the ACTUAL CONTENT from the source documents. Do not use placeholder text like "Section 1", "Section 2", etc. Extract the real headings, paragraphs, and content from the documents.
+
+DO NOT return a schema description - return actual extracted content in the JSON format shown above.
 """.strip()

    # Final assembly
--- a/test_document_processing.py
+++ b/test_document_processing.py
@ -220,6 +220,8 @@ async def process_documents_and_generate_summary():
        
        userPrompt = "Analyze the document containing mails for customer use cases. Can you create one file for each email in plain text format?"

+        # userPrompt = "Can you create one file for each section in the document"
+
        # userPrompt = "Analyze these documents and create a fitting image for the content"

        # userPrompt = "Extract the table from file and produce 2 lists in excel. one list with all entries, one list only with entries that are yellow highlighted."
--- a/test_multifile_processing.py
+++ b/test_multifile_processing.py
@ -0,0 +1,263 @@
+#!/usr/bin/env python3
+"""
+Test script for multi-file processing implementation.
+This script tests the new multi-file functionality without breaking existing single-file processing.
+"""
+
+import asyncio
+import json
+import logging
+from typing import Dict, Any, List
+
+# Setup logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+async def test_multi_file_detection():
+    """Test AI-powered multi-file detection."""
+    print("=== Testing Multi-File Detection ===")
+    
+    # Mock AI service for testing
+    class MockAiService:
+        async def call(self, request):
+            class MockResponse:
+                def __init__(self, content):
+                    self.content = content
+            return MockResponse('{"is_multi_file": true, "strategy": "per_entity", "criteria": "customer_id", "file_naming_pattern": "{customer_name}_data.docx", "reasoning": "User wants separate files for each customer"}')
+    
+    class MockAiObjects:
+        def __init__(self):
+            self.call = MockAiService().call
+    
+    # Import the AI service
+    try:
+        from modules.services.serviceAi.mainServiceAi import AiService
+        
+        # Create mock service center
+        class MockServiceCenter:
+            def __init__(self):
+                self.utils = MockUtils()
+        
+        class MockUtils:
+            def debugLogToFile(self, message, category):
+                print(f"[{category}] {message}")
+        
+        # Create AI service instance
+        ai_service = AiService(MockServiceCenter())
+        ai_service.aiObjects = MockAiObjects()
+        
+        # Test prompts
+        test_prompts = [
+            "Create one file for each customer in the document",
+            "Split the data into separate files by category", 
+            "Generate individual files for each product",
+            "Create a single report with all data",
+            "Erstelle eine Datei für jeden Kunden",  # German
+            "Créer un fichier par section"  # French
+        ]
+        
+        for prompt in test_prompts:
+            print(f"\nTesting prompt: '{prompt}'")
+            try:
+                analysis = await ai_service._analyzePromptIntent(prompt, ai_service)
+                print(f"  Analysis: {analysis}")
+                
+                if analysis.get("is_multi_file"):
+                    print(f"  ✓ Detected as multi-file with strategy: {analysis.get('strategy')}")
+                else:
+                    print(f"  ✓ Detected as single-file")
+                    
+            except Exception as e:
+                print(f"  ✗ Error: {str(e)}")
+        
+        print("\n=== Multi-File Detection Test Complete ===")
+        return True
+        
+    except ImportError as e:
+        print(f"Import error: {e}")
+        print("Make sure you're running from the gateway directory")
+        return False
+    except Exception as e:
+        print(f"Error during testing: {e}")
+        return False
+
+async def test_json_schema_validation():
+    """Test JSON schema validation for both single and multi-file."""
+    print("\n=== Testing JSON Schema Validation ===")
+    
+    try:
+        from modules.services.serviceGeneration.subJsonSchema import (
+            get_document_subJsonSchema,
+            get_multi_document_subJsonSchema,
+            get_adaptive_json_schema,
+            validate_json_document
+        )
+        
+        # Test single document schema
+        single_doc_schema = get_document_subJsonSchema()
+        print(f"✓ Single document schema loaded: {len(single_doc_schema)} properties")
+        
+        # Test multi-document schema
+        multi_doc_schema = get_multi_document_subJsonSchema()
+        print(f"✓ Multi-document schema loaded: {len(multi_doc_schema)} properties")
+        
+        # Test adaptive schema selection
+        single_analysis = {"is_multi_file": False}
+        multi_analysis = {"is_multi_file": True}
+        
+        single_schema = get_adaptive_json_schema(single_analysis)
+        multi_schema = get_adaptive_json_schema(multi_analysis)
+        
+        print(f"✓ Adaptive schema selection working")
+        print(f"  Single-file schema type: {single_schema.get('type', 'unknown')}")
+        print(f"  Multi-file schema type: {multi_schema.get('type', 'unknown')}")
+        
+        # Test validation with sample data
+        single_doc_data = {
+            "metadata": {"title": "Test Document"},
+            "sections": [
+                {
+                    "id": "section_1",
+                    "content_type": "paragraph",
+                    "elements": [{"text": "Test content"}],
+                    "order": 1
+                }
+            ]
+        }
+        
+        multi_doc_data = {
+            "metadata": {
+                "title": "Test Documents",
+                "splitStrategy": "per_entity"
+            },
+            "documents": [
+                {
+                    "id": "doc_1",
+                    "title": "Document 1",
+                    "filename": "doc1.docx",
+                    "sections": [
+                        {
+                            "id": "section_1",
+                            "content_type": "paragraph",
+                            "elements": [{"text": "Content 1"}],
+                            "order": 1
+                        }
+                    ]
+                }
+            ]
+        }
+        
+        single_valid = validate_json_document(single_doc_data)
+        multi_valid = validate_json_document(multi_doc_data)
+        
+        print(f"✓ Single document validation: {'PASS' if single_valid else 'FAIL'}")
+        print(f"✓ Multi-document validation: {'PASS' if multi_valid else 'FAIL'}")
+        
+        print("\n=== JSON Schema Validation Test Complete ===")
+        return True
+        
+    except ImportError as e:
+        print(f"Import error: {e}")
+        return False
+    except Exception as e:
+        print(f"Error during schema testing: {e}")
+        return False
+
+async def test_prompt_builder():
+    """Test adaptive prompt building."""
+    print("\n=== Testing Prompt Builder ===")
+    
+    try:
+        from modules.services.serviceGeneration.subPromptBuilder import (
+            buildAdaptiveExtractionPrompt,
+            buildGenericExtractionPrompt
+        )
+        
+        # Mock services
+        class MockServices:
+            def __init__(self):
+                self.utils = MockUtils()
+        
+        class MockUtils:
+            def debugLogToFile(self, message, category):
+                print(f"[{category}] {message}")
+        
+        services = MockServices()
+        
+        # Test adaptive prompt building
+        prompt_analysis = {
+            "is_multi_file": True,
+            "strategy": "per_entity",
+            "criteria": "customer_id",
+            "file_naming_pattern": "{customer_name}_data.docx"
+        }
+        
+        adaptive_prompt = await buildAdaptiveExtractionPrompt(
+            outputFormat="docx",
+            userPrompt="Create one file for each customer",
+            title="Customer Data",
+            promptAnalysis=prompt_analysis,
+            aiService=None,
+            services=services
+        )
+        
+        print(f"✓ Adaptive prompt generated: {len(adaptive_prompt)} characters")
+        print(f"  Contains multi-file instructions: {'documents' in adaptive_prompt}")
+        
+        # Test generic prompt building
+        generic_prompt = await buildGenericExtractionPrompt(
+            outputFormat="docx",
+            userPrompt="Create a single report",
+            title="Report",
+            aiService=None,
+            services=services
+        )
+        
+        print(f"✓ Generic prompt generated: {len(generic_prompt)} characters")
+        print(f"  Contains single-file instructions: {'sections' in generic_prompt}")
+        
+        print("\n=== Prompt Builder Test Complete ===")
+        return True
+        
+    except ImportError as e:
+        print(f"Import error: {e}")
+        return False
+    except Exception as e:
+        print(f"Error during prompt builder testing: {e}")
+        return False
+
+async def main():
+    """Run all tests."""
+    print("Starting Multi-File Processing Tests...")
+    print("=" * 50)
+    
+    tests = [
+        test_multi_file_detection,
+        test_json_schema_validation,
+        test_prompt_builder
+    ]
+    
+    results = []
+    for test in tests:
+        try:
+            result = await test()
+            results.append(result)
+        except Exception as e:
+            print(f"Test failed with exception: {e}")
+            results.append(False)
+    
+    print("\n" + "=" * 50)
+    print("Test Results Summary:")
+    print(f"  Tests run: {len(tests)}")
+    print(f"  Passed: {sum(results)}")
+    print(f"  Failed: {len(tests) - sum(results)}")
+    
+    if all(results):
+        print("\n🎉 All tests passed! Multi-file processing is ready.")
+    else:
+        print("\n⚠️  Some tests failed. Check the implementation.")
+    
+    return all(results)
+
+if __name__ == "__main__":
+    asyncio.run(main())