diff --git a/modules/datamodels/datamodelExtraction.py b/modules/datamodels/datamodelExtraction.py index 886df3b9..65f84de0 100644 --- a/modules/datamodels/datamodelExtraction.py +++ b/modules/datamodels/datamodelExtraction.py @@ -61,6 +61,14 @@ class MergeStrategy(BaseModel): capabilities: Optional[Dict[str, Any]] = Field(default=None, description="Model capabilities for intelligent merging") +class DocumentIntent(BaseModel): + """Intent-Analyse für ein einzelnes Dokument""" + documentId: str = Field(description="ID des Dokuments") + intents: List[str] = Field(description="Liste von Intents: ['extract', 'render', 'reference'] - mehrere möglich") + extractionPrompt: Optional[str] = Field(default=None, description="Spezifischer Prompt für Extraktion (z.B. 'Extract text from images for legends')") + reasoning: str = Field(description="Erklärung für Debugging/Transparenz: Warum wurde dieser Intent gewählt?") + + class ExtractionOptions(BaseModel): """Options for document extraction and processing with clear data structures.""" diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 648e922c..30e7cc88 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -4,11 +4,12 @@ import json import logging import re import time +import base64 from typing import Dict, Any, List, Optional, Tuple -from modules.datamodels.datamodelChat import PromptPlaceholder +from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum -from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( @@ -183,7 +184,8 @@ Respond with ONLY a JSON object in this exact format: promptBuilder: Optional[callable] = None, promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, - userPrompt: Optional[str] = None + userPrompt: Optional[str] = None, + contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content ) -> str: """ Shared core function for AI calls with repair-based looping system. @@ -254,10 +256,14 @@ Respond with ONLY a JSON object in this exact format: try: if iterationOperationId: self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") + # ARCHITECTURE: Pass ContentParts directly to AiCallRequest + # This allows model-aware chunking to handle large content properly + # ContentParts are only passed in first iteration (continuations don't need them) request = AiCallRequest( prompt=iterationPrompt, context="", - options=options + options=options, + contentParts=contentParts if iteration == 1 else None # Only pass ContentParts in first iteration ) # Write the ACTUAL prompt sent to AI @@ -971,22 +977,1164 @@ If no trackable items can be identified, return: {{"kpis": []}} self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") return result + # Helper methods for callAiContent refactoring + + async def _handleImageGeneration( + self, + prompt: str, + options: AiCallOptions, + title: Optional[str], + aiOperationId: str + ) -> AiResponse: + """Handle IMAGE_GENERATE operation type.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + + request = AiCallRequest( + prompt=prompt, + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No image data returned: {response.content}" + logger.error(f"Error in AI image generation: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + imageDoc = DocumentData( + documentName="generated_image.png", + documentData=response.content, + mimeType="image/png" + ) + + metadata = AiResponseMetadata( + title=title or "Generated Image", + operationType=options.operationType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + "ai.generate.image" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata, + documents=[imageDoc] + ) + + async def _handleWebOperation( + self, + prompt: str, + options: AiCallOptions, + opType: OperationTypeEnum, + aiOperationId: str + ) -> AiResponse: + """Handle WEB_SEARCH and WEB_CRAWL operation types.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") + + request = AiCallRequest( + prompt=prompt, # Raw JSON prompt - connector will parse it + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No content returned from {opType.name}: {response.content}" + logger.error(f"Error in {opType.name}: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + metadata = AiResponseMetadata( + operationType=opType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + f"ai.{opType.name.lower()}" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata + ) + + def _getIntentForDocument( + self, + docId: str, + intents: Optional[List[DocumentIntent]] + ) -> Optional[DocumentIntent]: + """Find DocumentIntent for given documentId.""" + if not intents: + return None + for intent in intents: + if intent.documentId == docId: + return intent + return None + + async def _clarifyDocumentIntents( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str + ) -> List[DocumentIntent]: + """ + Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. + Gibt DocumentIntent für jedes Dokument zurück. + + Args: + documents: Liste der zu verarbeitenden Dokumente + userPrompt: User-Anfrage + actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von DocumentIntent-Objekten + """ + from modules.datamodels.datamodelChat import ChatDocument + + # Erstelle Operation-ID für Intent-Analyse + intentOperationId = f"{parentOperationId}_intent_analysis" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + intentOperationId, + "Document Intent Analysis", + "Intent Analysis", + f"Analyzing {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse + documentMapping = {} # Maps original doc ID -> JSON doc ID + resolvedDocuments = [] + + for doc in documents: + preExtracted = self._resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + documentMapping[originalDocId] = doc.id + # Erstelle temporäres ChatDocument für ursprüngliches Dokument + from modules.datamodels.datamodelChat import ChatDocument + originalDoc = ChatDocument( + id=originalDocId, + fileName=preExtracted["originalDocument"]["fileName"], + mimeType=preExtracted["originalDocument"]["mimeType"], + fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), + fileId=doc.fileId # Behalte fileId vom JSON + ) + resolvedDocuments.append(originalDoc) + else: + resolvedDocuments.append(doc) + + # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten + intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=intentPrompt, + debugType="document_intent_analysis" + ) + + # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig + intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) + documentIntents = [] + for intent in intentsData.get("intents", []): + docId = intent.get("documentId") + # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID + if docId in documentMapping: + intent["documentId"] = documentMapping[docId] + documentIntents.append(DocumentIntent(**intent)) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([intent.dict() for intent in documentIntents], indent=2), + "document_intent_analysis_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(intentOperationId, True) + + return documentIntents + + except Exception as e: + self.services.chat.progressLogFinish(intentOperationId, False) + logger.error(f"Error in _clarifyDocumentIntents: {str(e)}") + raise + + def _resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: + """ + Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. + Gibt Dict zurück mit: + - originalDocument: ChatDocument-Info des ursprünglichen Dokuments + - contentExtracted: ContentExtracted-Objekt mit Parts + - parts: Liste der ContentParts + + Returns None wenn kein pre-extracted Format erkannt wird. + """ + if document.mimeType != "application/json": + return None + + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if not docBytes: + return None + + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if not isinstance(jsonData, dict): + return None + + # Check for ContentExtracted format + documentData = None + if "parts" in jsonData and isinstance(jsonData.get("parts"), list): + # Direct ContentExtracted format: {"id": "...", "parts": [...], ...} + documentData = jsonData + else: + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + + if documentData: + from modules.datamodels.datamodelExtraction import ContentExtracted + + try: + contentExtracted = ContentExtracted(**documentData) + + if contentExtracted.parts: + # Extrahiere ursprüngliche Dokument-Info aus den Parts + originalDocId = None + originalFileName = None + originalMimeType = None + + for part in contentExtracted.parts: + if part.metadata: + # Versuche ursprüngliche Dokument-Info zu finden + if not originalDocId and part.metadata.get("documentId"): + originalDocId = part.metadata.get("documentId") + if not originalFileName and part.metadata.get("originalFileName"): + originalFileName = part.metadata.get("originalFileName") + if not originalMimeType and part.metadata.get("documentMimeType"): + originalMimeType = part.metadata.get("documentMimeType") + + # Falls nicht gefunden, verwende documentName aus ContentExtracted + if not originalFileName and hasattr(contentExtracted, 'id'): + # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") + if document.fileName and "_extracted_" in document.fileName: + originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" + + return { + "originalDocument": { + "id": originalDocId or document.id, + "fileName": originalFileName or document.fileName, + "mimeType": originalMimeType or "application/pdf", + "fileSize": document.fileSize + }, + "contentExtracted": contentExtracted, + "parts": contentExtracted.parts + } + except Exception as parseError: + logger.debug(f"Could not parse ContentExtracted format: {str(parseError)}") + return None + + return None + except Exception as e: + logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") + return None + + def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] + ) -> str: + """Baue Prompt für Intent-Analyse.""" + # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs + docListText = "" + for i, doc in enumerate(documents, 1): + # Prüfe ob es ein pre-extracted JSON ist + preExtracted = self._resolvePreExtractedDocument(doc) + + if preExtracted: + # Zeige ursprüngliches Dokument statt JSON + originalDoc = preExtracted["originalDocument"] + partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" + docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" + docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" + docListText += f" MIME Type: {originalDoc['mimeType']}\n" + docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" + else: + # Normales Dokument + docListText += f"\n{i}. Document ID: {doc.id}\n" + docListText += f" File Name: {doc.fileName}\n" + docListText += f" MIME Type: {doc.mimeType}\n" + docListText += f" File Size: {doc.fileSize} bytes\n" + + outputFormat = actionParameters.get("outputFormat", "txt") + + prompt = f"""USER REQUEST: +{userPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine its intents (can be multiple): +- "extract": Content extraction needed (text, structure, OCR, etc.) +- "render": Image/binary should be rendered as-is (visual element) +- "reference": Document reference/attachment (no extraction, just reference) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], # Array - can contain multiple! + "extractionPrompt": "Extract all text content, preserving structure", + "reasoning": "User needs text content for document generation" + }}, + {{ + "documentId": "doc_2", + "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "extractionPrompt": "Extract text content from image using vision AI", + "reasoning": "Image contains text that needs extraction, but also should be rendered visually" + }}, + {{ + "documentId": "doc_3", + "intents": ["reference"], + "extractionPrompt": null, + "reasoning": "Document is only used as reference, no extraction needed" + }} + ] +}} + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → add "render" + - If user wants to "analyze", "read text", or "extract text" from images → add "extract" + - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering + +2. For text documents: + - If user mentions "template" or "structure" → "reference" or "extract" based on context + - If user mentions "reference" or "context" → "reference" + - Default → "extract" + +3. Consider output format: + - For formats like PDF, DOCX, PPTX: images usually need "render" + - For formats like CSV, JSON: usually "extract" only + - For HTML: can have both "extract" and "render" + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str + ) -> List[ContentPart]: + """ + Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. + Gibt Liste von ContentParts im passenden Format zurück. + + WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. + Beispiel: Bild mit intents=["extract", "render"] erzeugt: + - ContentPart(contentFormat="object", ...) für Rendering + - ContentPart(contentFormat="extracted", ...) für Text-Analyse + + Args: + documents: Liste der zu verarbeitenden Dokumente + documentIntents: Liste von DocumentIntent-Objekten + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von ContentParts mit vollständigen Metadaten + """ + # Erstelle Operation-ID für Extraktion + extractionOperationId = f"{parentOperationId}_content_extraction" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + extractionOperationId, + "Content Extraction", + "Extraction", + f"Extracting from {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + allContentParts = [] + + for document in documents: + # Check if document is already a ContentExtracted document (pre-extracted JSON) + preExtracted = self._resolvePreExtractedDocument(document) + + if preExtracted: + # Verwende bereits extrahierte ContentParts direkt + contentExtracted = preExtracted["contentExtracted"] + intent = self._getIntentForDocument(document.id, documentIntents) + + if contentExtracted.parts: + for part in contentExtracted.parts: + # Überspringe leere Parts (Container ohne Daten) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + if part.typeGroup == "container": + continue # Überspringe leere Container + + if not part.metadata: + part.metadata = {} + + # Ensure metadata is complete + if "documentId" not in part.metadata: + part.metadata["documentId"] = document.id + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + + # WICHTIG: Prüfe Intent für dieses Part + partIntent = intent.intents if intent else ["extract"] + + # Wenn Intent "render" für Images hat, erstelle auch object Part + if "render" in partIntent and part.typeGroup == "image" and part.data: + # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part + # 1. Extracted Part (bereits vorhanden) + part.metadata["intent"] = "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Image'}", + typeGroup="image", + mimeType=part.mimeType or "image/jpeg", + data=part.data, # Base64 data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": part.id + } + ) + allContentParts.append(objectPart) + else: + # Normales extracted Part + part.metadata["intent"] = partIntent[0] if partIntent else "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") + logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") + continue # Skip normal extraction for this document + + # Check if it's standardized JSON format (has "documents" or "sections") + if document.mimeType == "application/json": + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if docBytes: + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.info(f"Document is already in standardized JSON format, using as reference") + # Create reference ContentPart for structured JSON + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="structure", + mimeType="application/json", + data=docData, + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "skipExtraction": True, + "intent": "reference" + } + ) + allContentParts.append(contentPart) + logger.info(f"✅ Using JSON document directly without extraction") + continue # Skip normal extraction for this document + except Exception as e: + logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") + # Continue with normal extraction + + # Normal extraction path + intent = self._getIntentForDocument(document.id, documentIntents) + + if not intent: + # Default: extract für alle Dokumente ohne Intent + logger.warning(f"No intent found for document {document.id}, using default 'extract'") + intent = DocumentIntent( + documentId=document.id, + intents=["extract"], + extractionPrompt="Extract all content from the document", + reasoning="Default intent: no specific intent found" + ) + + # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen + + if "reference" in intent.intents: + # Erstelle Reference ContentPart + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="reference", + mimeType=document.mimeType, + data="", + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "intent": "reference", + "usageHint": f"Reference document: {document.fileName}" + } + ) + allContentParts.append(contentPart) + + # WICHTIG: "render" und "extract" können beide vorhanden sein! + # In diesem Fall erzeugen wir BEIDE ContentParts + + if "render" in intent.intents: + # Für Images/Binary: extrahiere als Object + if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): + try: + # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) + binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) + if not binaryData: + logger.warning(f"No binary data found for document {document.id}") + continue + base64Data = base64.b64encode(binaryData).decode('utf-8') + + contentPart = ContentPart( + id=f"obj_{document.id}", + label=f"Object: {document.fileName}", + typeGroup="image" if document.mimeType.startswith("image/") else "binary", + mimeType=document.mimeType, + data=base64Data, + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {document.fileName}", + "originalFileName": document.fileName, + # Verknüpfung zu extracted Part (falls vorhanden) + "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None + } + ) + allContentParts.append(contentPart) + except Exception as e: + logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") + + if "extract" in intent.intents: + # Extrahiere Content mit Extraction Service + extractionPrompt = intent.extractionPrompt or "Extract all content from the document" + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + extractionPrompt, + f"content_extraction_prompt_{document.id}" + ) + + # Führe Extraktion aus + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy() + ) + + # extractContent ist nicht async - keine await nötig + extractedResults = self.services.extraction.extractContent( + [document], + extractionOptions, + operationId=extractionOperationId, + parentOperationId=extractionOperationId + ) + + # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten + for extracted in extractedResults: + for part in extracted.parts: + # Markiere als extracted Format + part.metadata.update({ + "contentFormat": "extracted", + "documentId": document.id, + "extractionPrompt": extractionPrompt, + "intent": "extract", + "usageHint": f"Use extracted content from {document.fileName}", + # Verknüpfung zu object Part (falls vorhanden) + "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None + }) + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) + if "render" in intent.intents: + part.id = f"ext_{document.id}_{part.id}" + allContentParts.append(part) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([part.dict() for part in allContentParts], indent=2, default=str), + "content_extraction_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(extractionOperationId, True) + + return allContentParts + + except Exception as e: + self.services.chat.progressLogFinish(extractionOperationId, False) + logger.error(f"Error in _extractAndPrepareContent: {str(e)}") + raise + + def _isBinary(self, mimeType: str) -> bool: + """Prüfe ob MIME-Type binary ist.""" + binaryTypes = [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/x-zip-compressed" + ] + return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + + async def _generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5C: Generiert Dokument-Struktur mit Sections. + Jede Section spezifiziert: + - Welcher Content sollte in dieser Section sein + - Welche ContentParts zu verwenden sind + - Format für jeden ContentPart + + Args: + userPrompt: User-Anfrage + contentParts: Alle vorbereiteten ContentParts mit Metadaten + outputFormat: Ziel-Format (html, docx, pdf, etc.) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Struktur-Dict mit documents und sections + """ + # Erstelle Operation-ID für Struktur-Generierung + structureOperationId = f"{parentOperationId}_structure_generation" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + structureOperationId, + "Structure Generation", + "Structure", + f"Generating structure for {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Baue Struktur-Prompt mit Content-Index + structurePrompt = self._buildStructurePrompt( + userPrompt=userPrompt, + contentParts=contentParts, + outputFormat=outputFormat + ) + + # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=structurePrompt, + debugType="document_generation_structure" + ) + + # Parse Struktur + structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) + + # ChatLog abschließen + self.services.chat.progressLogFinish(structureOperationId, True) + + return structure + + except Exception as e: + self.services.chat.progressLogFinish(structureOperationId, False) + logger.error(f"Error in _generateStructure: {str(e)}") + raise + + def _buildStructurePrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str + ) -> str: + """Baue Prompt für Struktur-Generierung.""" + # Baue ContentParts-Index - filtere leere Parts heraus + contentPartsIndex = "" + validParts = [] + for part in contentParts: + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + # Überspringe Container-Parts ohne Daten + if part.typeGroup == "container" and not part.data: + continue + # Überspringe andere leere Parts + if not part.data: + continue + + validParts.append(part) + + # Baue Index nur für gültige Parts + for i, part in enumerate(validParts, 1): + contentFormat = part.metadata.get("contentFormat", "unknown") + dataPreview = "" + + if contentFormat == "extracted": + # Für Image-Parts: Zeige dass es ein Image ist + if part.typeGroup == "image": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "image" + dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" + elif part.typeGroup == "container": + # Container ohne Daten überspringen wir bereits oben + dataPreview = "Container structure (no text content)" + else: + # Zeige Preview von extrahiertem Text + if part.data: + preview = part.data[:200] + "..." if len(part.data) > 200 else part.data + dataPreview = preview + else: + dataPreview = "(empty)" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "binary" + if part.typeGroup == "image": + dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" + else: + dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" + elif contentFormat == "reference": + dataPreview = part.metadata.get("documentReference", "reference") + + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" + contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" + contentPartsIndex += f" Data preview: {dataPreview}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts available)" + + prompt = f"""USER REQUEST: +{userPrompt} + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +TASK: Generiere Dokument-Struktur mit Sections. +Für jede Section, spezifiziere: +- section id +- content_type (heading, paragraph, image, table, etc.) +- contentPartIds: [Liste von ContentPart-IDs zu verwenden] +- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist +- generation_hint: Was AI für diese Section generieren soll +- elements: [] (leer, wird in nächster Phase gefüllt) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "metadata": {{ + "title": "Document Title", + "language": "de" + }}, + "documents": [{{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.{outputFormat}", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "generation_hint": "Main title", + "contentPartIds": [], + "contentFormats": {{}}, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "paragraph", + "generation_hint": "Introduction paragraph", + "contentPartIds": ["part_ext_1"], + "contentFormats": {{ + "part_ext_1": "extracted" + }}, + "elements": [] + }} + ] + }}] +}} + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _fillStructure( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D: Füllt Struktur mit tatsächlichem Content. + Für jede Section: + - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format + - Wenn generation_hint spezifiziert: Generiere AI-Content + + **Implementierungsdetails:** + - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) + - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + + Args: + structure: Struktur-Dict mit documents und sections + contentParts: Alle vorbereiteten ContentParts + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Gefüllte Struktur mit elements in jeder Section + """ + import copy + + # Erstelle Operation-ID für Struktur-Abfüllen + fillOperationId = f"{parentOperationId}_structure_filling" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=parentOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) + sections_to_process = [] + for doc in filledStructure.get("documents", []): + for section in doc.get("sections", []): + sections_to_process.append((doc, section)) + + # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) + for doc, section in sections_to_process: + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + + elements = [] + + # Verarbeite ContentParts + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) + + # Generiere AI-Content wenn nötig + if generationHint: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds], + userPrompt=userPrompt, + generationHint=generationHint + ) + + # Erstelle Operation-ID für Section-Generierung + # Debug-Logs werden bereits von callAiPlanning geschrieben + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Generiere Content (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=generationPrompt, + debugType=f"section_generation_{sectionId}" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + section["elements"] = elements + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return filledStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in _fillStructure: {str(e)}") + raise + + def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[Optional[ContentPart]], + userPrompt: str, + generationHint: str + ) -> str: + """Baue Prompt für Section-Generierung.""" + # Filtere None-Werte + validParts = [p for p in contentParts if p is not None] + + contentPartsText = "" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + if contentFormat == "extracted": + contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + elif contentFormat == "object": + contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n" + + prompt = f"""USER REQUEST: +{userPrompt} + +SECTION TO GENERATE: +{generationHint} + +AVAILABLE CONTENT FOR THIS SECTION: +{contentPartsText} + +CRITICAL: Return ONLY a JSON object with an "elements" array. +Jedes Element sollte dem content_type der Section entsprechen. +""" + return prompt + + def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: + """Finde ContentPart nach ID.""" + for part in contentParts: + if part.id == partId: + return part + return None + + async def _renderResult( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: str, + userPrompt: str, + parentOperationId: str + ) -> Tuple[bytes, str]: + """ + Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. + Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert. + + Args: + filledStructure: Gefüllte Struktur mit elements + outputFormat: Ziel-Format (pdf, docx, html, etc.) + title: Dokument-Titel + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Tuple von (renderedContent, mimeType) + """ + # Erstelle Operation-ID für Rendering + renderOperationId = f"{parentOperationId}_rendering" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + renderOperationId, + "Content Rendering", + "Rendering", + f"Rendering to {outputFormat} format", + parentOperationId=parentOperationId + ) + + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + + generationService = GenerationService(self.services) + + # Multi-Dokument-Rendering + documents = filledStructure.get("documents", []) + + if len(documents) == 1: + # Einzelnes Dokument - wie bisher + renderedContent, mimeType, images = await generationService.renderReport( + filledStructure, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + else: + # Mehrere Dokumente - rendere alle + # Option: Alle Sections zusammenführen und als ein Dokument rendern + all_sections = [] + for doc in documents: + if "sections" in doc: + all_sections.extend(doc.get("sections", [])) + + # Erstelle temporäres Dokument mit allen Sections + merged_document = { + "metadata": filledStructure["metadata"], + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + + renderedContent, mimeType, images = await generationService.renderReport( + merged_document, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(renderOperationId, True) + + return renderedContent, mimeType + + except Exception as e: + self.services.chat.progressLogFinish(renderOperationId, False) + logger.error(f"Error in _renderResult: {str(e)}") + raise + + def _shouldSkipContentPart( + self, + part: ContentPart + ) -> bool: + """Check if ContentPart should be skipped (already structured JSON).""" + if part.typeGroup == "structure" and part.mimeType == "application/json": + if part.metadata.get("skipExtraction", False): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (skipExtraction=True)") + return True + try: + if isinstance(part.data, str): + jsonData = json.loads(part.data) + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (contains documents/sections)") + return True + except Exception: + pass # Not JSON, continue processing + return False + async def callAiContent( self, prompt: str, options: AiCallOptions, contentParts: Optional[List[ContentPart]] = None, + documentList: Optional[Any] = None, # DocumentReferenceList + documentIntents: Optional[List[DocumentIntent]] = None, outputFormat: Optional[str] = None, title: Optional[str] = None, - parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging + parentOperationId: Optional[str] = None ) -> AiResponse: """ - Unified AI content processing method (replaces callAiDocuments and callAiText). + Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions. + + Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch. + Sie unterscheiden sich nur in Parametern, nicht in Logik. Args: prompt: The main prompt for the AI call - contentParts: Optional list of already-extracted content parts (preferred) options: AI call configuration options (REQUIRED - operationType must be set) + contentParts: Optional list of already-extracted content parts (preferred) + documentList: Optional DocumentReferenceList (wird zu ChatDocuments konvertiert) + documentIntents: Optional list of DocumentIntent objects (wird erstellt wenn nicht vorhanden) outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents parentOperationId: Optional parent operation ID for hierarchical logging @@ -996,14 +2144,11 @@ If no trackable items can be identified, return: {{"kpis": []}} """ await self.ensureAiObjectsInitialized() - # Create separate operationId for detailed progress tracking + # Erstelle Operation-ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - # parentOperationId is already the operationId of the parent - - # Start progress tracking with parent reference + # Starte Progress-Tracking mit Parent-Referenz self.services.chat.progressLogStart( aiOperationId, "AI content processing", @@ -1013,376 +2158,141 @@ If no trackable items can be identified, return: {{"kpis": []}} ) try: - # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way) + # Initialisiere Defaults if not outputFormat: outputFormat = "txt" - # Extraction is now separate - contentParts must be extracted before calling - # Require operationType to be set before calling opType = getattr(options, "operationType", None) if not opType: - # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE options.operationType = OperationTypeEnum.DATA_GENERATE opType = OperationTypeEnum.DATA_GENERATE - # Handle IMAGE_GENERATE operations + # Route zu Operation-spezifischen Handlern if opType == OperationTypeEnum.IMAGE_GENERATE: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") - - request = AiCallRequest( - prompt=prompt, - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - # Build document data for image - imageDoc = DocumentData( - documentName="generated_image.png", - documentData=response.content, - mimeType="image/png" - ) - - metadata = AiResponseMetadata( - title=title or "Generated Image", - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - "ai.generate.image" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata, - documents=[imageDoc] - ) - else: - errorMsg = f"No image data returned: {response.content}" - logger.error(f"Error in AI image generation: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleImageGeneration(prompt, options, title, aiOperationId) - # Handle WEB_SEARCH and WEB_CRAWL operations if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") - - request = AiCallRequest( - prompt=prompt, # Raw JSON prompt - connector will parse it - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - metadata = AiResponseMetadata( - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - f"ai.{opType.name.lower()}" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata - ) - else: - errorMsg = f"No content returned from {opType.name}: {response.content}" - logger.error(f"Error in {opType.name}: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleWebOperation(prompt, options, opType, aiOperationId) - # Handle document generation (outputFormat always set, defaults to "txt") - # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way - # outputFormat is always set now (defaults to "txt" if not specified) - - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + # Dokument-Generierungs-Pfad options.compressPrompt = False options.compressContext = False - # Process contentParts for generation prompt (if provided) - # Use generic callWithContentParts() which handles all content types (images, text, etc.) - # This automatically processes images with vision models and merges all results - if contentParts: - # Filter out binary/other parts that shouldn't be processed - processableParts = [] - skippedParts = [] - for p in contentParts: - if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))): - processableParts.append(p) - else: - skippedParts.append(p) - - if skippedParts: - logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation") - - if processableParts: - # Count images for progress update - imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))]) - if imageCount > 0: - self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models") - - # Build proper extraction prompt using buildExtractionPrompt - # This creates a focused extraction prompt, not the user's generation prompt - from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt - - # Determine renderer for format-specific guidelines - renderer = None - if outputFormat: - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - renderer = generationService.getRendererForFormat(outputFormat) - except Exception as e: - logger.debug(f"Could not get renderer for format {outputFormat}: {e}") - - extractionPrompt = await buildExtractionPrompt( - outputFormat=outputFormat or "txt", - userPrompt=prompt, # User's prompt as context for what to extract - title=title or "Document", - aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None, - services=self.services, - renderer=renderer - ) - - logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt") - - # Use DATA_EXTRACT operation type for extraction - extractionOptions = AiCallOptions( - operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction - compressPrompt=options.compressPrompt, - compressContext=options.compressContext - ) - - extractionRequest = AiCallRequest( - prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt - context="", - options=extractionOptions, - contentParts=processableParts - ) - - # Write debug file for extraction prompt (all parts) - self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt") - - # Call generic content parts processor - handles images, text, chunking, merging - extractionResponse = await self.callAi(extractionRequest) - - # Write debug file for extraction response - if extractionResponse.content: - self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response") - else: - self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response") - logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})") - - # Use extracted content directly for generation prompt - if extractionResponse.errorCount == 0 and extractionResponse.content: - # The extracted content is already merged and ready to use - content_for_generation = extractionResponse.content - logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation") - else: - # Extraction failed - use placeholders - logger.warning(f"Content extraction failed, using placeholders") - placeholderParts = [] - for p in processableParts: - placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]") - content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None - else: - content_for_generation = None - logger.debug("No processable parts found in contentParts") - else: - content_for_generation = None + # Schritt 5A: Kläre Dokument-Intents + documents = [] + if documentList: + documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) - # Detect if this is a section generation prompt (not full document generation) - # Section prompts contain "SECTION TO GENERATE" marker - isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt - - if isSectionGeneration: - # For section generation, use the prompt directly without wrapping - # Section prompts are already complete and should not be wrapped in document generation template - logger.debug("Detected section generation prompt - skipping document generation wrapper") - generation_prompt = prompt - - # Call AI directly without looping (sections are simple, single-call) - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation") - request = AiCallRequest( - prompt=generation_prompt, - context="", - options=options - ) - response = await self.callAi(request) - generated_json = response.content if response and response.content else "" - - # For section generation, return the raw JSON content directly - # No rendering needed - sections are just JSON elements - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - metadata = AiResponseMetadata( - title=title or "Section Content", - operationType=opType.value if opType else None - ) - - return AiResponse( - content=generated_json, - metadata=metadata, - documents=[] - ) - else: - # Full document generation - use the wrapper - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None, self.services - ) - - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation, - "services": self.services - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - # Track generation progress - the looping function will update with byte progress - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt + if not documentIntents and documents: + documentIntents = await self._clarifyDocumentIntents( + documents, + prompt, + {"outputFormat": outputFormat}, + aiOperationId ) - # Calculate final size for completion message - finalSize = len(generated_json.encode('utf-8')) if generated_json else 0 - if finalSize < 1024: - finalSizeDisplay = f"{finalSize}B" - elif finalSize < 1024 * 1024: - finalSizeDisplay = f"{finalSize / 1024:.1f}kB" - else: - finalSizeDisplay = f"{finalSize / (1024 * 1024):.1f}MB" + # Schritt 5B: Extrahiere und bereite Content vor + if documents: + preparedContentParts = await self._extractAndPrepareContent( + documents, + documentIntents or [], + aiOperationId + ) + + # Merge mit bereitgestellten contentParts (falls vorhanden) + if contentParts: + # Prüfe auf pre-extracted Content + for part in contentParts: + if part.metadata.get("skipExtraction", False): + # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig + part.metadata.setdefault("contentFormat", "extracted") + part.metadata.setdefault("isPreExtracted", True) + preparedContentParts.extend(contentParts) + + contentParts = preparedContentParts - self.services.chat.progressLogUpdate(aiOperationId, 0.7, f"Parsing generated JSON ({finalSizeDisplay})") - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Generated content is not valid JSON: {str(e)}") - - # Extract title and filename from generated document structure - extractedTitle = title - extractedFilename = None - if isinstance(generated_data, dict) and "documents" in generated_data: - docs = generated_data["documents"] - if isinstance(docs, list) and len(docs) > 0: - firstDoc = docs[0] - if isinstance(firstDoc, dict): - if firstDoc.get("title"): - extractedTitle = firstDoc["title"] - if firstDoc.get("filename"): - extractedFilename = firstDoc["filename"] - - # Ensure metadata contains the extracted title - if "metadata" not in generated_data: - generated_data["metadata"] = {} - if extractedTitle: - generated_data["metadata"]["title"] = extractedTitle - - # Create separate operation for content rendering - renderOperationId = f"{aiOperationId}_render" - # Use aiOperationId directly as parentOperationId (operationId, not log entry ID) - self.services.chat.progressLogStart( - renderOperationId, - "Content Rendering", - "Rendering", - f"Format: {outputFormat}", - parentOperationId=aiOperationId + # Schritt 5C: Generiere Struktur + structure = await self._generateStructure( + prompt, + contentParts or [], + outputFormat, + aiOperationId ) - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type, _images = await generationService.renderReport( - generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self - ) - self.services.chat.progressLogFinish(renderOperationId, True) - - # Determine document name - if extractedFilename: - documentName = extractedFilename - elif extractedTitle and extractedTitle != "Generated Document": - sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) - sanitized = re.sub(r"_+", "_", sanitized).strip("_") - if sanitized: - if not sanitized.lower().endswith(f".{outputFormat}"): - documentName = f"{sanitized}.{outputFormat}" - else: - documentName = sanitized - else: - documentName = f"generated.{outputFormat}" - else: - documentName = f"generated.{outputFormat}" - - # Build document data - docData = DocumentData( - documentName=documentName, - documentData=rendered_content, - mimeType=mime_type, - sourceJson=generated_data # Preserve source JSON for structure validation - ) - - metadata = AiResponseMetadata( - title=extractedTitle or title or "Generated Document", - filename=extractedFilename, - operationType=opType.value if opType else None - ) - - # Write JSON with proper formatting (not str() which can truncate) - jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile(jsonStr, "document_generation_response") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=json.dumps(generated_data), - metadata=metadata, - documents=[docData] - ) - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - if renderOperationId: - self.services.chat.progressLogFinish(renderOperationId, False) - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Rendering failed: {str(e)}") + # Schritt 5D: Fülle Struktur + filledStructure = await self._fillStructure( + structure, + contentParts or [], + prompt, + aiOperationId + ) + + # Schritt 5E: Rendere Resultat + renderedContent, mimeType = await self._renderResult( + filledStructure, + outputFormat, + title or "Generated Document", + prompt, + aiOperationId + ) + + # Baue Response + documentName = self._determineDocumentName(filledStructure, outputFormat, title) + + docData = DocumentData( + documentName=documentName, + documentData=renderedContent, + mimeType=mimeType, + sourceJson=filledStructure + ) + + metadata = AiResponseMetadata( + title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), + operationType=opType.value + ) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str), + "document_generation_response" + ) + + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=json.dumps(filledStructure), + metadata=metadata, + documents=[docData] + ) except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") self.services.chat.progressLogFinish(aiOperationId, False) raise + + def _determineDocumentName( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: Optional[str] + ) -> str: + """Bestimme Dokument-Namen aus Struktur oder Titel.""" + # Versuche aus Struktur zu extrahieren + if isinstance(filledStructure, dict) and "documents" in filledStructure: + docs = filledStructure["documents"] + if isinstance(docs, list) and len(docs) > 0: + firstDoc = docs[0] + if isinstance(firstDoc, dict) and firstDoc.get("filename"): + return firstDoc["filename"] + + # Fallback zu Titel + if title: + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{outputFormat}"): + return f"{sanitized}.{outputFormat}" + return sanitized + + return f"generated.{outputFormat}" diff --git a/modules/services/serviceExtraction/chunking/chunkerStructure.py b/modules/services/serviceExtraction/chunking/chunkerStructure.py index bdf1bcdb..f4d23a72 100644 --- a/modules/services/serviceExtraction/chunking/chunkerStructure.py +++ b/modules/services/serviceExtraction/chunking/chunkerStructure.py @@ -34,12 +34,42 @@ class StructureChunker(Chunker): if bucket: emit(bucket) else: + # JSON object (dict) - check if it fits text = json.dumps(obj, ensure_ascii=False) - if len(text.encode('utf-8')) <= maxBytes: + textSize = len(text.encode('utf-8')) + if textSize <= maxBytes: emit(obj) else: - # fallback to line chunking - raise ValueError("too large") + # Object too large - try to split by keys if possible + # For large objects, we need to chunk by character boundaries + # since we can't split JSON objects arbitrarily + if isinstance(obj, dict) and len(obj) > 1: + # Try to split object into multiple chunks by keys + # This preserves JSON structure better than line-based chunking + currentChunk: Dict[str, Any] = {} + currentSize = 2 # Start with "{}" overhead + for key, value in obj.items(): + itemText = json.dumps({key: value}, ensure_ascii=False) + itemSize = len(itemText.encode('utf-8')) + # Account for comma and spacing between items + if currentChunk: + itemSize += 2 # ", " separator + + if currentSize + itemSize > maxBytes and currentChunk: + # Current chunk is full, emit it + emit(currentChunk) + currentChunk = {key: value} + currentSize = len(itemText.encode('utf-8')) + else: + currentChunk[key] = value + currentSize += itemSize + + # Emit remaining chunk + if currentChunk: + emit(currentChunk) + else: + # Single large value or can't split - fallback to line chunking + raise ValueError("too large") except Exception: current: List[str] = [] size = 0 diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 663753cd..a2972453 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -6,10 +6,11 @@ import logging import time import asyncio import base64 +import json from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subPipeline import runExtraction -from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult +from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall from modules.aicore.aicoreModelRegistry import modelRegistry @@ -73,12 +74,14 @@ class ExtractionService: if operationId: workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" docOperationId = f"{operationId}_doc_{i}" + # Use parentOperationId if provided, otherwise use operationId as parent + parentId = parentOperationId if parentOperationId else operationId self.services.chat.progressLogStart( docOperationId, "Extracting Document", f"Document {i + 1}/{totalDocs}", doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName, - parentOperationId=operationId # Use operationId as parent (not parentOperationId) + parentOperationId=parentId # Correct parent reference for ChatLog hierarchy ) # Start timing for this document @@ -125,12 +128,41 @@ class ExtractionService: if part.metadata: logger.debug(f" Metadata: {part.metadata}") - # Attach document id and MIME type to parts if missing + # Attach complete metadata to parts according to ContentPart Metadaten-Schema for p in ec.parts: + # Ensure metadata dict exists + if not p.metadata: + p.metadata = {} + + # Required metadata fields (from concept) if "documentId" not in p.metadata: p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4()) if "documentMimeType" not in p.metadata: p.metadata["documentMimeType"] = documentData["mimeType"] + if "originalFileName" not in p.metadata: + p.metadata["originalFileName"] = documentData["fileName"] + + # ContentFormat: Set based on typeGroup and mimeType + # Default to "extracted" for text content, but can be overridden by caller + if "contentFormat" not in p.metadata: + # Default: extracted text content + p.metadata["contentFormat"] = "extracted" + + # Intent: Default to "extract" for extracted content + if "intent" not in p.metadata: + p.metadata["intent"] = "extract" + + # ExtractionPrompt: Use from options if available + if "extractionPrompt" not in p.metadata and options and options.prompt: + p.metadata["extractionPrompt"] = options.prompt + + # UsageHint: Provide default hint + if "usageHint" not in p.metadata: + p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}" + + # SourceAction: Mark as from extraction service + if "sourceAction" not in p.metadata: + p.metadata["sourceAction"] = "extraction.extractContent" # Log chunking information chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)] @@ -185,7 +217,7 @@ class ExtractionService: # Write extraction results to debug file try: from modules.shared.debugLogger import writeDebugFile - import json + # json is already imported at module level # Create summary of extraction results for debug extractionSummary = { "documentName": doc.fileName, @@ -487,7 +519,8 @@ class ExtractionService: prompt: str, aiObjects: Any, options: Optional[AiCallOptions] = None, - operationId: Optional[str] = None + operationId: Optional[str] = None, + parentOperationId: Optional[str] = None ) -> str: """ Process documents with model-aware chunking and merge results. @@ -499,6 +532,7 @@ class ExtractionService: aiObjects: AiObjects instance for making AI calls options: AI call options operationId: Optional operation ID for progress tracking + parentOperationId: Optional parent operation ID for hierarchical logging Returns: Merged AI results as string with preserved document structure @@ -514,7 +548,8 @@ class ExtractionService: operationId, "AI Text Extract", "Document Processing", - f"Processing {len(documents)} documents" + f"Processing {len(documents)} documents", + parentOperationId=parentOperationId # Use parentOperationId if provided ) try: @@ -539,7 +574,8 @@ class ExtractionService: if operationId: self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents") # Pass operationId as parentOperationId for hierarchical logging - extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId) + # Correct hierarchy: parentOperationId -> operationId -> docOperationId + extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=operationId) if not isinstance(extractionResult, list): if operationId: @@ -549,9 +585,10 @@ class ExtractionService: # Process parts (not chunks) with model-aware AI calls if operationId: self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts") - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - parentOperationId = operationId # Use the parent's operationId directly - partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId) + # Use operationId as parentOperationId for child operations + # Correct hierarchy: parentOperationId -> operationId -> partOperationId + processParentOperationId = operationId + partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, processParentOperationId) # Merge results using existing merging system if operationId: @@ -733,7 +770,8 @@ class ExtractionService: # Detect input type and convert accordingly if isinstance(partResults[0], PartResult): # Existing logic for PartResult (from processDocumentsPerChunk) - for part_result in partResults: + # Phase 7: Add originalIndex for explicit ordering + for i, part_result in enumerate(partResults): content_part = ContentPart( id=part_result.originalPart.id, parentId=part_result.originalPart.parentId, @@ -744,7 +782,9 @@ class ExtractionService: metadata={ **part_result.originalPart.metadata, "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index "partIndex": part_result.partIndex, + "processingOrder": i, # Phase 7: Processing order "documentId": part_result.documentId, "processingTime": part_result.processingTime, "success": part_result.metadata.get("success", False) @@ -753,6 +793,7 @@ class ExtractionService: content_parts.append(content_part) elif isinstance(partResults[0], AiCallResponse): # Logic from interfaceAiObjects (from content parts processing) + # Phase 7: Add originalIndex for explicit ordering for i, result in enumerate(partResults): if result.content: content_part = ContentPart( @@ -764,6 +805,8 @@ class ExtractionService: data=result.content, metadata={ "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index + "processingOrder": i, # Phase 7: Processing order "modelName": result.modelName, "priceUsd": result.priceUsd, "processingTime": result.processingTime, @@ -792,11 +835,12 @@ class ExtractionService: # Determine merge strategy based on input type if isinstance(partResults[0], PartResult): - # Use strategy for extraction workflow (group by document, order by part index) + # Phase 7: Use originalIndex for explicit ordering + # Use strategy for extraction workflow (group by document, order by originalIndex) merge_strategy = MergeStrategy( useIntelligentMerging=True, groupBy="documentId", # Group by document - orderBy="partIndex", # Order by part index + orderBy="originalIndex", # Phase 7: Order by originalIndex instead of partIndex mergeType="concatenate" ) else: @@ -811,10 +855,52 @@ class ExtractionService: # Apply merging merged_parts = applyMerging(content_parts, merge_strategy) - # Convert back to string - final_content = "\n\n".join([part.data for part in merged_parts]) + # Phase 6: Enhanced format with metadata preservation + # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing + # Check if this is a generation response by looking at operationType or content structure + isGenerationResponse = False + if options and hasattr(options, 'operationType'): + # Generation responses use DATA_GENERATE operation type + from modules.datamodels.datamodelAi import OperationTypeEnum + isGenerationResponse = options.operationType == OperationTypeEnum.DATA_GENERATE - logger.info(f"Merged {len(partResults)} parts using unified merging system") + # Also check if content looks like JSON (starts with { or [) + if not isGenerationResponse and merged_parts: + firstPartData = merged_parts[0].data if merged_parts[0].data else "" + if isinstance(firstPartData, str) and firstPartData.strip().startswith(('{', '[')): + # Check if it's a complete JSON structure (not extracted content) + # Generation responses are complete JSON, extraction responses are text content + try: + # json is already imported at module level + json.loads(firstPartData.strip()) + # If it parses as JSON and has "documents" key, it's likely a generation response + parsed = json.loads(firstPartData.strip()) + if isinstance(parsed, dict) and "documents" in parsed: + isGenerationResponse = True + except: + pass + + content_sections = [] + for part in merged_parts: + if isGenerationResponse: + # For generation responses, return JSON directly without SOURCE markers + content_sections.append(part.data) + else: + # For extraction responses, include metadata in section header for traceability + doc_id = part.metadata.get("documentId", "unknown") + doc_mime = part.metadata.get("documentMimeType", "unknown") + label = part.label or "content" + + section = f""" +[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}] +{part.data} +[END SOURCE] +""" + content_sections.append(section) + + final_content = "\n\n".join(content_sections) + + logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})") return final_content.strip() async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]: @@ -827,9 +913,14 @@ class ExtractionService: modelContextTokens = model.contextLength # Total context in tokens modelMaxOutputTokens = model.maxTokens # Maximum output tokens + # CRITICAL: Use same conservative token factor as in processContentPartWithFallback + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + # Reserve tokens for: - # 1. Prompt (user message) - promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0 + # 1. Prompt (user message) - use conservative factor + promptSize = len(prompt.encode('utf-8')) if prompt else 0 + promptTokens = promptSize / TOKEN_SAFETY_FACTOR # 2. System message wrapper ("Context from documents:\n") systemMessageTokens = 10 # ~40 bytes = 10 tokens @@ -844,31 +935,38 @@ class ExtractionService: totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens # Available tokens for content = context length - reserved tokens - # Use 80% of available for safety margin - availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8) + # Use 60% of available (same conservative margin as in processContentPartWithFallback) + availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60) # Ensure we have at least some space if availableContentTokens < 100: logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens") availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context - # Convert tokens to bytes (1 token ≈ 4 bytes) - availableContentBytes = availableContentTokens * 4 + # Convert tokens to bytes using conservative factor (reverse: bytes = tokens * factor) + availableContentBytes = int(availableContentTokens * TOKEN_SAFETY_FACTOR) - logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)") + logger.info(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens est., reserved={totalReservedTokens:.0f} tokens est., available={availableContentTokens} tokens est. ({availableContentBytes} bytes), factor={TOKEN_SAFETY_FACTOR}") - # Use 70% of available content bytes for text chunks (conservative) - textChunkSize = int(availableContentBytes * 0.7) - imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks + # Use 50% of available content bytes for text chunks (very conservative to ensure chunks fit) + # This ensures that even with token counting inaccuracies, chunks will fit + textChunkSize = int(availableContentBytes * 0.5) + structureChunkSize = int(availableContentBytes * 0.5) # CRITICAL: Also set for StructureChunker (JSON content) + tableChunkSize = int(availableContentBytes * 0.5) # Also set for TableChunker + imageChunkSize = int(availableContentBytes * 0.6) # 60% for image chunks - # Build chunking options + # Build chunking options - include ALL chunk size options for different chunkers chunkingOptions = { "textChunkSize": textChunkSize, + "structureChunkSize": structureChunkSize, # CRITICAL: Required for StructureChunker (JSON) + "tableChunkSize": tableChunkSize, # Required for TableChunker "imageChunkSize": imageChunkSize, "maxSize": availableContentBytes, "chunkAllowed": True } + logger.info(f"Chunking options: textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes, tableChunkSize={tableChunkSize} bytes, imageChunkSize={imageChunkSize} bytes, contentPartSize={len(contentPart.data.encode('utf-8')) if contentPart.data else 0} bytes") + # Get appropriate chunker (uses existing ChunkerRegistry ✅) chunker = self._chunkerRegistry.resolve(contentPart.typeGroup) @@ -878,8 +976,14 @@ class ExtractionService: # Chunk the content part try: + contentSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0 + logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes") chunks = chunker.chunk(contentPart, chunkingOptions) - logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part") + logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)") + if chunks: + for i, chunk in enumerate(chunks): + chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0 + logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes") return chunks except Exception as e: logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}") @@ -999,15 +1103,86 @@ class ExtractionService: availableContentBytes = availableContentTokens * 4 - logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes") + # Also check prompt size - prompt + content together must fit + promptSize = len(prompt.encode('utf-8')) if prompt else 0 - if partSize <= availableContentBytes: + # CRITICAL: Token counting approximation is VERY inaccurate for JSON/content + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + # This happens because: + # 1. JSON/structured content tokenizes differently (more tokens per byte) + # 2. API has message structure overhead (system prompts, message wrappers) + # 3. Tokenizer differences between our approximation and actual API tokenizer + # Use conservative factor: 1 token ≈ 2.2 bytes (instead of 4) to account for these differences + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + promptTokens = promptSize / TOKEN_SAFETY_FACTOR + contentTokens = partSize / TOKEN_SAFETY_FACTOR + totalTokens = promptTokens + contentTokens + + # CRITICAL: Use very conservative margin (60%) because: + # 1. Token counting approximation is inaccurate - real tokens can be 2-3x more + # 2. API has additional overhead (message structure, system prompts, etc.) + # 3. Anthropic API is strict about the 200k limit + # 4. We've seen cases where our calculation says "fits" but API says "too long" + maxTotalTokens = int(modelContextTokens * 0.60) + + logger.info(f"Size check for {model.name}: partSize={partSize} bytes ({contentTokens:.0f} tokens est.), promptSize={promptSize} bytes ({promptTokens:.0f} tokens est.), total={totalTokens:.0f} tokens est., modelContext={modelContextTokens} tokens, maxTotal={maxTotalTokens} tokens (60% margin, conservative factor={TOKEN_SAFETY_FACTOR})") + + # CRITICAL: Always check totalTokens first - if prompt + content exceeds limit, MUST chunk + # Token counting approximation may differ significantly from API, so use very conservative margin + if totalTokens > maxTotalTokens: + logger.warning(f"⚠️ Total tokens ({totalTokens:.0f} est.) exceed model limit ({maxTotalTokens}), chunking required. Prompt: {promptTokens:.0f} tokens est., Content: {contentTokens:.0f} tokens est.") + elif partSize > availableContentBytes: + logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required") + + # If either condition fails, chunk the content + if totalTokens > maxTotalTokens or partSize > availableContentBytes: + # Part too large or total exceeds limit - chunk it + chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) + if not chunks: + raise ValueError(f"Failed to chunk content part for model {model.name}") + + logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}") + + if progressCallback: + progressCallback(0.0, f"Starting to process {len(chunks)} chunks") + + chunkResults = [] + for idx, chunk in enumerate(chunks): + chunkNum = idx + 1 + chunkData = chunk.get('data', '') + logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}") + + if progressCallback: + progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}") + + try: + chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options) + chunkResults.append(chunkResponse) + except Exception as chunkError: + logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}") + # Continue with other chunks even if one fails + continue + + # Merge chunk results + if not chunkResults: + raise ValueError(f"All chunks failed for content part") + + mergedContent = self.mergePartResults(chunkResults, options) + return AiCallResponse( + content=mergedContent, + modelName=model.name, + priceUsd=sum(r.priceUsd for r in chunkResults), + processingTime=sum(r.processingTime for r in chunkResults), + bytesSent=sum(r.bytesSent for r in chunkResults), + bytesReceived=sum(r.bytesReceived for r in chunkResults), + errorCount=sum(r.errorCount for r in chunkResults) + ) + else: # Part fits - call AI directly via aiObjects interface + logger.info(f"✅ Content part fits within model limits, processing directly") response = await aiObjects._callWithModel(model, prompt, contentPart.data, options) logger.info(f"✅ Content part processed successfully with model: {model.name}") return response - else: - # Part too large - chunk it chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) if not chunks: raise ValueError(f"Failed to chunk content part for model {model.name}") @@ -1037,8 +1212,8 @@ class ExtractionService: logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}") raise - # Merge chunk results - mergedContent = self.mergeChunkResults(chunkResults) + # Merge chunk results using unified mergePartResults + mergedContent = self.mergePartResults(chunkResults, options) logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)") return AiCallResponse( diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 5b518afa..cababbeb 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -2,7 +2,9 @@ # All rights reserved. import logging import uuid -from typing import Any, Dict, List, Optional +import base64 +import traceback +from typing import Any, Dict, List, Optional, Callable from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceGeneration.subDocumentUtility import ( getFileExtension, @@ -100,12 +102,12 @@ class GenerationService: # For binary data, handle bytes vs base64 string vs regular string if isinstance(documentData, bytes): # Already bytes - encode to base64 string for storage - import base64 + # base64 is already imported at module level content = base64.b64encode(documentData).decode('utf-8') base64encoded = True elif isinstance(documentData, str): # Check if it's already valid base64 - import base64 + # base64 is already imported at module level try: # Try to decode to verify it's base64 base64.b64decode(documentData, validate=True) @@ -122,7 +124,7 @@ class GenerationService: continue else: # Other types - convert to string then base64 - import base64 + # base64 is already imported at module level try: content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8') base64encoded = True @@ -231,7 +233,7 @@ class GenerationService: return None # Convert content to bytes if base64encoded: - import base64 + # base64 is already imported at module level content_bytes = base64.b64decode(content) else: content_bytes = content.encode('utf-8') @@ -319,10 +321,12 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]: """ Render extracted JSON content to the specified output format. + Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering). Always uses unified "documents" array format. + Supports three content formats: reference, object (base64), extracted_text. Args: extractedContent: Structured JSON document from AI extraction @@ -330,6 +334,7 @@ class GenerationService: title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation + parentOperationId: Optional parent operation ID for hierarchical logging Returns: tuple: (rendered_content, mime_type, images_list) @@ -348,15 +353,40 @@ class GenerationService: if len(documents) == 0: raise ValueError("No documents found in 'documents' array") - # Use first document for rendering - single_doc = documents[0] - if "sections" not in single_doc: - raise ValueError("Document must contain 'sections' field") - - # Pass standardized schema to renderer (maintains architecture) - # Renderer should extract sections from documents array according to standardized schema - # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]} - contentToRender = extractedContent # Pass full standardized schema + # Phase 5: Multi-Dokument-Rendering + if len(documents) == 1: + # Single document - use existing logic + single_doc = documents[0] + if "sections" not in single_doc: + raise ValueError("Document must contain 'sections' field") + + # Pass standardized schema to renderer (maintains architecture) + contentToRender = extractedContent # Pass full standardized schema + else: + # Multiple documents - merge all sections into one document for rendering + # Option: Merge all sections from all documents into a single document + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) + + if not all_sections: + raise ValueError("No sections found in any document") + + # Create merged document with all sections + merged_document = { + "metadata": extractedContent.get("metadata", {}), + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + contentToRender = merged_document + logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections") # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) @@ -378,6 +408,92 @@ class GenerationService: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") raise + async def generateDocumentWithTwoPhases( + self, + userPrompt: str, + cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, + maxSectionLength: int = 500, + parallelGeneration: bool = True, + progressCallback: Optional[Callable] = None + ) -> Dict[str, Any]: + """ + Generate document using two-phase approach: + 1. Generate structure skeleton with empty sections + 2. Generate content for each section iteratively + + This is the core logic for document generation in AI calls. + + Args: + userPrompt: User's original prompt + cachedContent: Optional extracted content cache (from extraction phase) + contentParts: Optional list of ContentParts to use for structure generation + maxSectionLength: Maximum words for simple sections + parallelGeneration: Enable parallel section generation + progressCallback: Optional callback function(progress, total, message) for progress updates + + Returns: + Complete document structure with populated elements ready for rendering + """ + try: + from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator + from modules.services.serviceGeneration.subContentGenerator import ContentGenerator + + # Phase 1: Generate structure skeleton + if progressCallback: + progressCallback(0, 100, "Generating document structure...") + + structureGenerator = StructureGenerator(self.services) + + # Extract imageDocuments from cachedContent if available + existingImages = None + if cachedContent and cachedContent.get("imageDocuments"): + existingImages = cachedContent.get("imageDocuments") + + structure = await structureGenerator.generateStructure( + userPrompt=userPrompt, + documentList=None, # Not used in current implementation + cachedContent=cachedContent, + contentParts=contentParts, # Pass ContentParts for structure generation + maxSectionLength=maxSectionLength, + existingImages=existingImages + ) + + if progressCallback: + progressCallback(30, 100, "Structure generated, starting content generation...") + + # Phase 2: Generate content for each section + contentGenerator = ContentGenerator(self.services) + + # Create progress callback wrapper for content generation phase (30-90%) + def contentProgressCallback(sectionIndex: int, totalSections: int, message: str): + if progressCallback: + # Map section progress to overall progress (30% to 90%) + if totalSections > 0: + overallProgress = 30 + int(60 * (sectionIndex / totalSections)) + else: + overallProgress = 30 + progressCallback(overallProgress, 100, f"Section {sectionIndex}/{totalSections}: {message}") + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for content generation + progressCallback=contentProgressCallback, + parallelGeneration=parallelGeneration + ) + + if progressCallback: + progressCallback(100, 100, "Document generation complete") + + return completeStructure + + except Exception as e: + logger.error(f"Error in two-phase document generation: {str(e)}") + logger.debug(traceback.format_exc()) + raise + async def getAdaptiveExtractionPrompt( self, outputFormat: str, @@ -423,6 +539,6 @@ class GenerationService: except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") - import traceback + # traceback is already imported at module level logger.debug(traceback.format_exc()) return None \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 491c1d06..e9693680 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -68,6 +68,7 @@ class BaseRenderer(ABC): def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """ Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + Phase 5: Supports multiple documents - extracts all sections from all documents. """ if "documents" not in reportData: raise ValueError("Report data must follow standardized schema with 'documents' array") @@ -76,11 +77,18 @@ class BaseRenderer(ABC): if not isinstance(documents, list) or len(documents) == 0: raise ValueError("Standardized schema must contain at least one document in 'documents' array") - firstDoc = documents[0] - if not isinstance(firstDoc, dict) or "sections" not in firstDoc: - raise ValueError("Document in standardized schema must contain 'sections' field") + # Phase 5: Extract sections from ALL documents + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) - return firstDoc.get("sections", []) + if not all_sections: + raise ValueError("No sections found in any document") + + return all_sections def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: """ diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 48fb94f1..f62935d8 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -9,6 +9,7 @@ from typing import Dict, Any, Tuple, List import io import base64 import re +import csv try: from docx import Document @@ -225,13 +226,36 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Could not clear template content: {str(e)}") def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Render a single JSON section to DOCX using AI-generated styles.""" + """Render a single JSON section to DOCX using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Process each element in the section for element in elements: + element_type = element.get("type", "") + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + para = doc.add_paragraph(f"[Reference: {label}]") + para.runs[0].italic = True + continue + elif element_type == "extracted_text": + # Extracted text format - render as paragraph + content = element.get("content", "") + source = element.get("source", "") + if content: + para = doc.add_paragraph(content) + if source: + para.add_run(f" (Source: {source})").italic = True + continue + + # Standard section types if section_type == "table": self._renderJsonTable(doc, element, styles) elif section_type == "bullet_list": @@ -848,7 +872,7 @@ class RendererDocx(BaseRenderer): Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Returns the content with tables replaced by placeholders. """ - import csv + # csv is already imported at module level lines = content.split('\n') processed_lines = [] diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 163690d3..54c7e64b 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -297,11 +297,39 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a single JSON section to HTML using AI-generated styles.""" + """Render a single JSON section to HTML using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'
[Reference: {label}]
') + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'{content}{source_text}
') + continue + + # If we processed reference/extracted_text elements, return them + if htmlParts: + return '\n'.join(htmlParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 3c9569e9..dfe2bda2 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -77,11 +77,39 @@ class RendererMarkdown(BaseRenderer): raise Exception(f"Markdown generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to markdown.""" + """Render a single JSON section to markdown. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + markdownParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + markdownParts.append(f"*[Reference: {label}]*") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" *(Source: {source})*" if source else "" + markdownParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if markdownParts: + return '\n\n'.join(markdownParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 1cfcfad7..128e84d3 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -477,7 +477,9 @@ class RendererPdf(BaseRenderer): return colors.black def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a single JSON section to PDF elements using AI-generated styles.""" + """Render a single JSON section to PDF elements using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = self._getSectionType(section) elements = self._getSectionData(section) @@ -485,6 +487,33 @@ class RendererPdf(BaseRenderer): # Process each element in the section all_elements = [] for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + ref_style = ParagraphStyle( + 'Reference', + parent=self._createNormalStyle(styles), + fontStyle='italic', + textColor=colors.grey + ) + all_elements.append(Paragraph(f"[Reference: {label}]", ref_style)) + all_elements.append(Spacer(1, 6)) + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles))) + all_elements.append(Spacer(1, 6)) + continue + + # Standard section types if section_type == "table": all_elements.extend(self._renderJsonTable(element, styles)) elif section_type == "bullet_list": diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 6b1b9e18..e9ad334c 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -3,6 +3,9 @@ import logging import base64 import io +import json +import re +from datetime import datetime, UTC from typing import Dict, Any, Optional, Tuple, List from .rendererBaseTemplate import BaseRenderer @@ -261,7 +264,7 @@ class RendererPptx(BaseRenderer): Returns: List of slide content strings """ - import re + # re is already imported at module level # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content @@ -399,7 +402,7 @@ class RendererPptx(BaseRenderer): def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" - import json + # json is already imported at module level schema_json = json.dumps(style_schema, indent=4) return f"""Customize the JSON below for professional PowerPoint slides. @@ -443,8 +446,7 @@ JSON ONLY. NO OTHER TEXT.""" self.logger.warning("AI service returned no response, using defaults") return default_styles - import json - import re + # json and re are already imported at module level # Clean and parse JSON result = response.content.strip() if response and response.content else "" @@ -634,6 +636,27 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Check for three content formats from Phase 5D in elements + content_parts = [] + for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + content_parts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + content_parts.append(f"{content}{source_text}") + continue + # Handle image sections specially if content_type == "image": # Extract image data @@ -647,26 +670,25 @@ JSON ONLY. NO OTHER TEXT.""" }) return { - "title": section_title or element.get("altText", "Image"), - "content": "", # No text content for image slides + "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), + "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } # Build slide content based on section type - content_parts = [] - - if content_type == "table": - content_parts.append(self._formatTableForSlide(elements)) - elif content_type == "list": - content_parts.append(self._formatListForSlide(elements)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(elements)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(elements)) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide(elements)) - else: - content_parts.append(self._format_paragraph_for_slide(elements)) + if not content_parts: # Only if we didn't process reference/extracted_text above + if content_type == "table": + content_parts.append(self._formatTableForSlide(elements)) + elif content_type == "list": + content_parts.append(self._formatListForSlide(elements)) + elif content_type == "heading": + content_parts.append(self._formatHeadingForSlide(elements)) + elif content_type == "paragraph": + content_parts.append(self._formatParagraphForSlide(elements)) + elif content_type == "code": + content_parts.append(self._formatCodeForSlide(elements)) + else: + content_parts.append(self._format_paragraph_for_slide(elements)) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -1057,5 +1079,5 @@ JSON ONLY. NO OTHER TEXT.""" def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" - from datetime import datetime, UTC + # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 56d4af61..acbeaaf9 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -100,11 +100,39 @@ class RendererText(BaseRenderer): raise Exception(f"Text generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to text.""" + """Render a single JSON section to text. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + textParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + textParts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + textParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if textParts: + return '\n\n'.join(textParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py index 0f75f595..681a5923 100644 --- a/modules/services/serviceGeneration/subContentGenerator.py +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -7,6 +7,10 @@ Generates content for each section in the document structure. import logging import asyncio +import json +import base64 +import re +import traceback from typing import Dict, Any, Optional, List, Callable from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator @@ -25,6 +29,7 @@ class ContentGenerator: structure: Dict[str, Any], cachedContent: Optional[Dict[str, Any]] = None, userPrompt: str = "", + contentParts: Optional[List[Any]] = None, progressCallback: Optional[Callable] = None, parallelGeneration: bool = True, batchSize: int = 10 @@ -33,9 +38,10 @@ class ContentGenerator: Generate content for all sections in structure. Args: - structure: Document structure from Phase 1 + structure: Document structure from Phase 1 (with contentPartIds per section) cachedContent: Extracted content cache userPrompt: Original user prompt + contentParts: List of all available ContentParts (for mapping by contentPartIds) progressCallback: Function to call for progress updates parallelGeneration: Enable parallel section generation batchSize: Number of sections to process in parallel @@ -89,6 +95,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -103,6 +110,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -138,7 +146,8 @@ class ContentGenerator: sections: List[Dict[str, Any]], cachedContent: Optional[Dict[str, Any]], userPrompt: str, - documentMetadata: Dict[str, Any], + contentParts: Optional[List[Any]] = None, + documentMetadata: Dict[str, Any] = {}, progressCallback: Optional[Callable] = None ) -> List[Dict[str, Any]]: """ @@ -149,6 +158,14 @@ class ContentGenerator: previousSections = [] totalSections = len(sections) + # Create ContentParts lookup map by ID + contentPartsMap = {} + if contentParts: + for part in contentParts: + partId = part.id if hasattr(part, 'id') else part.get('id', '') + if partId: + contentPartsMap[partId] = part + for idx, section in enumerate(sections): try: contentType = section.get("content_type", "content") @@ -171,11 +188,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": previousSections.copy(), "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None } @@ -272,11 +298,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": batchPreviousSections.copy(), # Include sections from previous batches "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None # Can be set if needed for nested progress } @@ -371,17 +406,13 @@ class ContentGenerator: # Create section-specific prompt sectionPrompt = self._createSectionPrompt(section, context) - # Debug: Log section generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - sectionId = section.get('id', 'unknown') - contentType = section.get('content_type', 'unknown') - try: - self.services.utils.writeDebugFile( - sectionPrompt, - f"document_generation_section_{sectionId}_{contentType}_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for section prompt: {e}") + # Debug: Log section generation prompt (harmonisiert - keine Checks nötig) + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + self.services.utils.writeDebugFile( + sectionPrompt, + f"document_generation_section_{sectionId}_{contentType}_prompt" + ) # Call AI to generate content from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -397,32 +428,27 @@ class ContentGenerator: outputFormat="json" ) - # Debug: Log section generation response (always log, even if empty) + # Debug: Log section generation response (harmonisiert - keine Checks nötig) sectionId = section.get('id', 'unknown') contentType = section.get('content_type', 'unknown') - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = '' - if aiResponse: - if hasattr(aiResponse, 'content') and aiResponse.content: - responseContent = aiResponse.content - elif hasattr(aiResponse, 'documents') and aiResponse.documents: - responseContent = f"[Response has {len(aiResponse.documents)} documents]" - else: - responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" - else: - responseContent = '[No response object]' - - self.services.utils.writeDebugFile( - responseContent, - f"document_generation_section_{sectionId}_{contentType}_response" - ) - logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") - except Exception as e: - logger.warning(f"Could not write debug file for section response: {e}") - import traceback - logger.debug(traceback.format_exc()) + responseContent = '' + if aiResponse: + if hasattr(aiResponse, 'content') and aiResponse.content: + responseContent = aiResponse.content + elif hasattr(aiResponse, 'documents') and aiResponse.documents: + responseContent = f"[Response has {len(aiResponse.documents)} documents]" + else: + responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" + else: + responseContent = '[No response object]' + + # Debug: Log section generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + responseContent, + f"document_generation_section_{sectionId}_{contentType}_response" + ) + logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") if not aiResponse or not aiResponse.content: logger.error(f"AI section generation returned empty response for section {sectionId}") @@ -443,7 +469,7 @@ class ContentGenerator: logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}") raise ValueError("No JSON found in AI section response") - import json + # json is already imported at module level try: elementsData = json.loads(extractedJson) logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") @@ -480,7 +506,7 @@ class ContentGenerator: # Last resort: try to extract partial content and create minimal valid JSON try: # Try to extract text content before the truncation point - import re + # re is already imported at module level # Look for text field that might be partially complete textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson) if textMatch: @@ -577,14 +603,14 @@ class ContentGenerator: ) -> Dict[str, Any]: """Generate image for image section or include existing image""" try: - # Check if this is an existing image to include + # Check if this is an existing image to include or render imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Include existing image from cachedContent + if imageSource == "existing" or imageSource == "render": + # Phase 4: Include existing image or render image from cachedContent imageRefId = section.get("image_reference_id") if not imageRefId: - raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id") + raise ValueError(f"Image section {section.get('id')} has image_source='{imageSource}' but no image_reference_id") cachedContent = context.get("cachedContent", {}) imageDocuments = cachedContent.get("imageDocuments", []) @@ -594,7 +620,7 @@ class ContentGenerator: if not imageDoc: raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments") - # Create image element from existing image + # Create image element from existing/render image altText = imageDoc.get("altText", section.get("generation_hint", "Image")) mimeType = imageDoc.get("mimeType", "image/png") @@ -605,7 +631,7 @@ class ContentGenerator: "caption": section.get("metadata", {}).get("caption") }] - logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}") + logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})") return section # Generate new image (existing logic) @@ -620,7 +646,7 @@ class ContentGenerator: # Call AI service for image generation from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage - import json + # json is already imported at module level # Create image generation prompt promptModel = AiCallPromptImage( @@ -664,7 +690,7 @@ class ContentGenerator: # Validate base64 data try: - import base64 + # base64 is already imported at module level base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars except Exception as e: logger.warning(f"Image data may not be valid base64: {str(e)}") @@ -710,9 +736,11 @@ class ContentGenerator: """Create sub-prompt for section content generation""" contentType = section.get("content_type", "") generationHint = section.get("generation_hint", "") + extractionPrompt = section.get("extractionPrompt") # Optional extraction prompt for ContentParts userPrompt = context.get("userPrompt", "") cachedContent = context.get("cachedContent") previousSections = context.get("previousSections", []) + sectionContentParts = context.get("sectionContentParts", []) # ContentParts for this section documentMetadata = context.get("documentMetadata", {}) # Get user language @@ -723,6 +751,51 @@ class ContentGenerator: if cachedContent and cachedContent.get("extractedContent"): cachedContentText = self._formatCachedContent(cachedContent) + # Format ContentParts for this section + contentPartsText = "" + imagePartReferences = [] # Track image parts for text reference + + if sectionContentParts: + try: + partsList = [] + imageIndex = 1 + for part in sectionContentParts: + partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '') + partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '') + partId = part.id if hasattr(part, 'id') else part.get('id', '') + partData = part.data if hasattr(part, 'data') else part.get('data', '') + + # Check if this is an image part + isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/")) + + if contentType == "image" and isImage: + # For image sections: include image data for integration + partsList.append(f"- ContentPart {partId} (image): [Image data available for integration]") + elif isImage: + # For non-image sections: track for text reference + imagePartReferences.append({ + "id": partId, + "index": imageIndex + }) + imageIndex += 1 + # Don't include image data in prompt for non-image sections + else: + # For text/table/etc parts: include data preview + dataPreview = str(partData)[:200] if partData else "[No data]" + partsList.append(f"- ContentPart {partId} ({partTypeGroup}): {dataPreview}{'...' if partData and len(str(partData)) > 200 else ''}") + + if partsList: + contentPartsText = "\n".join(partsList) + + # Add image reference instructions for non-image sections + if imagePartReferences and contentType != "image": + refText = ", ".join([f"Bild {ref['index']}" if userLanguage == "de" else f"Image {ref['index']}" for ref in imagePartReferences]) + contentPartsText += f"\n\nNOTE: Reference images as text in the document language: {refText}" + + except Exception as e: + logger.warning(f"Could not format ContentParts for section prompt: {str(e)}") + contentPartsText = "" + # Format previous sections for context previousSectionsText = "" if previousSections: @@ -787,14 +860,22 @@ EXTRACTED CONTENT (if available): {cachedContentText if cachedContentText else "None"} {'='*80} +{'='*80} +CONTENT PARTS FOR THIS SECTION: +{'='*80} +{contentPartsText if contentPartsText else "No ContentParts assigned to this section."} +{'='*80} + TASK: Generate content for this section ONLY. INSTRUCTIONS: 1. Generate content appropriate for section type: {contentType} 2. Use the generation hint: {generationHint} -3. Consider previous sections for continuity -4. Use extracted content if relevant -5. All content must be in the language '{userLanguage}' +{f"3. Use extractionPrompt for ContentParts: {extractionPrompt}" if extractionPrompt else "3. Use ContentParts data if provided"} +4. Consider previous sections for continuity +5. Use extracted content if relevant +6. All content must be in the language '{userLanguage}' +7. {'For image sections: Integrate image ContentParts as visual elements' if contentType == "image" else 'For non-image sections: Reference image ContentParts as text (e.g., "siehe Bild 1" in German, "see Image 1" in English)'} 6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure. diff --git a/modules/services/serviceGeneration/subContentIntegrator.py b/modules/services/serviceGeneration/subContentIntegrator.py index 7bee437e..1a83eb6e 100644 --- a/modules/services/serviceGeneration/subContentIntegrator.py +++ b/modules/services/serviceGeneration/subContentIntegrator.py @@ -65,18 +65,14 @@ class ContentIntegrator: ) sections[idx] = section - # Debug: Write final merged structure to debug file - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - import json - structureJson = json.dumps(structure, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile( - structureJson, - "document_generation_final_merged_json" - ) - logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") - except Exception as e: - logger.debug(f"Could not write debug file for final merged JSON: {e}") + # Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig) + import json + structureJson = json.dumps(structure, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile( + structureJson, + "document_generation_final_merged_json" + ) + logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") return structure diff --git a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py deleted file mode 100644 index d6620d3d..00000000 --- a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Document Purpose Analyzer for hierarchical document generation. -Uses AI to analyze user prompt and determine purpose for each document. -""" - -import logging -import json -from typing import Dict, Any, List, Optional -from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum - -logger = logging.getLogger(__name__) - - -class DocumentPurposeAnalyzer: - """Analyzes user prompt and documents to determine document purposes""" - - def __init__(self, services: Any): - self.services = services - - async def analyzeDocumentPurposes( - self, - userPrompt: str, - chatDocuments: List[ChatDocument], - actionContext: str = "generateDocument" - ) -> Dict[str, Any]: - """ - Use AI to analyze user prompt and determine purpose for each document. - - Args: - userPrompt: User's original prompt - chatDocuments: List of ChatDocument objects to analyze - actionContext: Action name (e.g., "generateDocument", "extractData") - - Returns: - { - "document_purposes": [ - { - "document_id": "...", - "purpose": "extract_text_content" | "include_image" | ..., - "reasoning": "...", - "extractionPrompt": "..." (if purpose requires extraction), - "processingNotes": "..." - } - ], - "overall_intent": "..." - } - """ - try: - if not chatDocuments: - return { - "document_purposes": [], - "overall_intent": "No documents provided" - } - - # Create document metadata list for AI analysis - documentMetadata = [] - for doc in chatDocuments: - docInfo = { - "document_id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "fileSize": doc.fileSize - } - documentMetadata.append(docInfo) - - # Create analysis prompt - analysisPrompt = self._createAnalysisPrompt( - userPrompt=userPrompt, - actionContext=actionContext, - documentMetadata=documentMetadata - ) - - # Debug: Log purpose analysis prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - analysisPrompt, - "document_purpose_analysis_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis prompt: {e}") - - # Call AI for analysis - options = AiCallOptions( - operationType=OperationTypeEnum.DATA_GENERATE, - resultFormat="json" - ) - - aiResponse = await self.services.ai.callAiContent( - prompt=analysisPrompt, - options=options, - outputFormat="json" - ) - - # Debug: Log purpose analysis response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = aiResponse.content if aiResponse and aiResponse.content else '' - responseMetadata = { - "status": aiResponse.status if aiResponse else "N/A", - "error": aiResponse.error if aiResponse else "N/A", - "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0 - } - self.services.utils.writeDebugFile( - f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}", - "document_purpose_analysis_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis response: {e}") - - if not aiResponse or not aiResponse.content: - logger.warning("AI purpose analysis returned empty response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Extract and parse JSON - extractedJson = self.services.utils.jsonExtractString(aiResponse.content) - if not extractedJson: - logger.warning("No JSON found in purpose analysis response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - try: - analysisResult = json.loads(extractedJson) - - # Validate structure - if "document_purposes" not in analysisResult: - logger.warning("Invalid analysis result structure, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Ensure all documents have purposes - analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])} - for doc in chatDocuments: - if doc.id not in analyzedIds: - logger.warning(f"Document {doc.id} not in analysis result, adding default purpose") - defaultPurpose = self._determineDefaultPurpose(doc, actionContext) - analysisResult["document_purposes"].append({ - "document_id": doc.id, - "purpose": defaultPurpose, - "reasoning": f"Default purpose based on document type and action context", - "extractionPrompt": None, - "processingNotes": None - }) - - return analysisResult - - except json.JSONDecodeError as e: - logger.error(f"Failed to parse purpose analysis JSON: {str(e)}") - logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - except Exception as e: - logger.error(f"Error analyzing document purposes: {str(e)}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - def _createAnalysisPrompt( - self, - userPrompt: str, - actionContext: str, - documentMetadata: List[Dict[str, Any]] - ) -> str: - """Create AI prompt for document purpose analysis""" - - # Format document list - docListText = "" - for i, docInfo in enumerate(documentMetadata, 1): - docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n" - docListText += f" File Name: {docInfo['fileName']}\n" - docListText += f" MIME Type: {docInfo['mimeType']}\n" - docListText += f" File Size: {docInfo['fileSize']} bytes\n" - - # Get user language - userLanguage = self._getUserLanguage() - - prompt = f"""{'='*80} -DOCUMENT PURPOSE ANALYSIS -{'='*80} - -USER PROMPT: -{userPrompt} - -ACTION CONTEXT: {actionContext} - -DOCUMENTS PROVIDED: -{docListText} -{'='*80} - -TASK: For each document, determine its purpose based on: -1. User prompt intent (what the user wants to do) -2. Action context (what action is being performed) -3. Document type (mimeType - is it text, image, etc.) -4. Document metadata (fileName, size) - -AVAILABLE PURPOSES: -- "extract_text_content": Extract text content for use in document generation -- "include_image": Include the image directly in the generated document (for images) -- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts) -- "use_as_template": Use document structure/layout as template for generation -- "use_as_reference": Use as background context/reference without detailed extraction -- "extract_data": Extract structured data (key-value pairs, entities, fields) -- "attach": Document is an attachment - don't process, just attach to output -- "convert_format": Convert document format (for convert actions) -- "translate": Translate document content (for translate actions) -- "summarize": Create summary of document (for summarize actions) -- "compare": Compare documents (for comparison actions) -- "merge": Merge documents (for merge actions) -- "extract_tables_charts": Extract tables and charts specifically -- "use_for_styling": Use document for styling/formatting reference only -- "extract_metadata": Extract only document metadata - -CRITICAL RULES: -1. For images (mimeType starts with "image/"): - - If user wants to "include" or "show" images → "include_image" - - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision" - - Default for images in generateDocument → "include_image" - -2. For text documents in generateDocument: - - If user mentions "template" or "structure" → "use_as_template" - - If user mentions "reference" or "context" → "use_as_reference" - - Default → "extract_text_content" - -3. Consider action context: - - generateDocument: Usually "extract_text_content" or "include_image" - - extractData: Usually "extract_data" - - translateDocument: Usually "translate" - - summarizeDocument: Usually "summarize" - -4. Return ONLY valid JSON following this structure: -{{ - "document_purposes": [ - {{ - "document_id": "document_id_here", - "purpose": "extract_text_content", - "reasoning": "Brief explanation in language '{userLanguage}'", - "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null", - "processingNotes": "Any special processing requirements or null" - }} - ], - "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'" -}} - -5. All content must be in the language '{userLanguage}' -6. Return ONLY the JSON structure. No explanations before or after. - -Return ONLY the JSON structure. -""" - return prompt - - def _createDefaultPurposes( - self, - chatDocuments: List[ChatDocument], - actionContext: str - ) -> Dict[str, Any]: - """Create default purposes when AI analysis fails""" - purposes = [] - - for doc in chatDocuments: - purpose = self._determineDefaultPurpose(doc, actionContext) - purposes.append({ - "document_id": doc.id, - "purpose": purpose, - "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})", - "extractionPrompt": None, - "processingNotes": None - }) - - return { - "document_purposes": purposes, - "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action" - } - - def _determineDefaultPurpose( - self, - doc: ChatDocument, - actionContext: str - ) -> str: - """Determine default purpose based on document type and action context""" - mimeType = doc.mimeType or "" - - # Image documents - if mimeType.startswith("image/"): - if actionContext == "generateDocument": - return "include_image" - elif actionContext in ["extractData", "process"]: - return "analyze_image_vision" - else: - return "include_image" # Default for images - - # Action-specific defaults - if actionContext == "extractData": - return "extract_data" - elif actionContext == "translateDocument": - return "translate" - elif actionContext == "summarizeDocument": - return "summarize" - elif actionContext == "convertDocument" or actionContext == "convert": - return "convert_format" - elif actionContext == "generateDocument": - return "extract_text_content" - else: - # Default for other actions - return "extract_text_content" - - def _getUserLanguage(self) -> str: - """Get user language for document generation""" - try: - if self.services: - if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: - return self.services.currentUserLanguage - elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): - return self.services.user.language - except Exception: - pass - return 'en' # Default fallback - diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index 9a78b9f4..0ee6fa5e 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -19,7 +19,8 @@ async def buildGenerationPrompt( title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None, - services: Any = None + services: Any = None, + useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly) ) -> str: """ Build the unified generation prompt using a single JSON template. @@ -120,7 +121,9 @@ Continue generating the remaining content now. # PROMPT FOR FIRST CALL # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions - if extracted_content: + # ARCHITECTURE: If useContentParts=True, don't include full content in prompt + # ContentParts will be passed directly to callAi for model-aware chunking + if extracted_content and not useContentParts: # If we have extracted content, put it FIRST and make it very clear it's the source data generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: diff --git a/modules/services/serviceGeneration/subStructureGenerator.py b/modules/services/serviceGeneration/subStructureGenerator.py index d2ef1aeb..62e72c69 100644 --- a/modules/services/serviceGeneration/subStructureGenerator.py +++ b/modules/services/serviceGeneration/subStructureGenerator.py @@ -24,6 +24,7 @@ class StructureGenerator: userPrompt: str, documentList: Optional[Any] = None, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> Dict[str, Any]: @@ -34,30 +35,28 @@ class StructureGenerator: userPrompt: User's original prompt documentList: Optional document references cachedContent: Optional extracted content cache + contentParts: Optional list of ContentParts to analyze for structure generation maxSectionLength: Maximum words for simple sections existingImages: Optional list of existing images to include Returns: - Document structure with empty elements arrays + Document structure with empty elements arrays and contentPartIds per section """ try: # Create structure generation prompt structurePrompt = self._createStructurePrompt( userPrompt=userPrompt, cachedContent=cachedContent, + contentParts=contentParts, maxSectionLength=maxSectionLength, existingImages=existingImages or [] ) - # Debug: Log structure generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - structurePrompt, - "document_generation_structure_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure prompt: {e}") + # Debug: Log structure generation prompt (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + structurePrompt, + "document_generation_structure_prompt" + ) # Call AI to generate structure from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -73,15 +72,11 @@ class StructureGenerator: outputFormat="json" ) - # Debug: Log structure generation response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - aiResponse.content if aiResponse and aiResponse.content else '', - "document_generation_structure_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure response: {e}") + # Debug: Log structure generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + aiResponse.content if aiResponse and aiResponse.content else '', + "document_generation_structure_response" + ) if not aiResponse or not aiResponse.content: raise ValueError("AI structure generation returned empty response") @@ -106,6 +101,7 @@ class StructureGenerator: self, userPrompt: str, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> str: @@ -126,6 +122,41 @@ class StructureGenerator: if cachedContent and cachedContent.get("imageDocuments"): existingImages = cachedContent.get("imageDocuments", []) + # Format ContentParts as JSON for structure generation + contentPartsJson = "" + if contentParts: + try: + import json + # Convert ContentParts to dict format for JSON serialization + contentPartsList = [] + for part in contentParts: + if hasattr(part, 'dict'): + partDict = part.dict() + elif isinstance(part, dict): + partDict = part + else: + # Try to convert to dict + partDict = { + "id": getattr(part, 'id', ''), + "typeGroup": getattr(part, 'typeGroup', ''), + "mimeType": getattr(part, 'mimeType', ''), + "label": getattr(part, 'label', ''), + "metadata": getattr(part, 'metadata', {}) + } + # Only include essential fields for structure generation (not full data) + contentPartsList.append({ + "id": partDict.get("id", ""), + "typeGroup": partDict.get("typeGroup", ""), + "mimeType": partDict.get("mimeType", ""), + "label": partDict.get("label", ""), + "metadata": partDict.get("metadata", {}) + }) + + contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False) + except Exception as e: + logger.warning(f"Could not format ContentParts as JSON: {str(e)}") + contentPartsJson = "" + # Create structure template structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title") @@ -145,13 +176,15 @@ EXTRACTED CONTENT (if available): {'='*80} INSTRUCTIONS: -1. Analyze the user request and extracted content +1. Analyze the user request, extracted content, and available ContentParts 2. Create a document structure with CONTENT sections only 3. For each section, specify: - id: Unique identifier (e.g., "section_title_1", "section_image_1") - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) - generation_hint: Brief description of what content should be generated + - contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty [] + - extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section - image_prompt: (only for image sections) Detailed prompt for image generation - order: Section order number (starting from 1) - elements: [] (empty array - will be populated later) @@ -160,10 +193,12 @@ INSTRUCTIONS: - If user requests illustrations/images, create image sections - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them - Add image_prompt field with detailed description for image generation (only for new images) - - Set complexity to "complex" + - Set complexity to "complex" for new images, "simple" for existing/render images - For existing images: Set image_source to "existing" and image_reference_id to the image document ID + - For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}} - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} + - Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} {'='*80} EXISTING IMAGES (to include in document): @@ -178,12 +213,21 @@ EXISTING IMAGES (to include in document): 7. Return ONLY valid JSON following this structure: {structureTemplate} -5. CRITICAL RULES: +5. CRITICAL RULES FOR CONTENT PARTS: + - Analyze available ContentParts and determine which ones are needed for each section + - For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements + - For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images) + - Each section can reference multiple ContentParts via contentPartIds array + - If specific extraction/processing is needed for ContentParts, provide extractionPrompt + - Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English) + +6. CRITICAL RULES: - Return ONLY valid JSON (no comments, no trailing commas, double quotes only) - Follow the exact JSON schema structure provided - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays) - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images + - ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed) - Image sections MUST include "image_prompt" field with detailed description for image generation - Order numbers MUST start from 1 (not 0) - All content must be in the language '{userLanguage}' @@ -235,6 +279,14 @@ Return ONLY the JSON structure. No explanations. if "elements" not in section: section["elements"] = [] + # Ensure contentPartIds field exists (can be empty array) + if "contentPartIds" not in section: + section["contentPartIds"] = [] + + # Ensure extractionPrompt field exists (optional) + if "extractionPrompt" not in section: + section["extractionPrompt"] = None + # Identify complexity if not set if "complexity" not in section: section["complexity"] = self._identifySectionComplexity( @@ -255,11 +307,11 @@ Return ONLY the JSON structure. No explanations. if section.get("content_type") == "image": imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Existing image - ensure image_reference_id is set + if imageSource == "existing" or imageSource == "render": + # Existing or render image - ensure image_reference_id is set if "image_reference_id" not in section: - logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id") - # Existing images are simple (no generation needed) + logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id") + # Existing/render images are simple (no generation needed, code integration) section["complexity"] = "simple" else: # New image generation - ensure image_prompt diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index f2678b63..9a7cffab 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -2,6 +2,7 @@ # All rights reserved. import json import logging +import re from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar from pydantic import BaseModel, ValidationError @@ -11,10 +12,32 @@ T = TypeVar('T', bound=BaseModel) def stripCodeFences(text: str) -> str: - """Remove ```json / ``` fences and surrounding whitespace if present.""" + """Remove ```json / ``` fences and surrounding whitespace if present. + Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON.""" if not text: return text s = text.strip() + + # Remove [SOURCE: ...] tags at the beginning + if s.startswith("[SOURCE:"): + # Find the end of the SOURCE tag (newline or end of string) + end_pos = s.find("\n") + if end_pos != -1: + s = s[end_pos+1:] + else: + # No newline, entire string is SOURCE tag + return "" + + # Remove [END SOURCE] tags at the end + if s.endswith("[END SOURCE]"): + # Find the start of END SOURCE tag (newline before it) + start_pos = s.rfind("\n[END SOURCE]") + if start_pos != -1: + s = s[:start_pos] + else: + # No newline, entire string is END SOURCE tag + return "" + # Handle opening fence (may or may not have closing fence) if s.startswith("```"): # Remove first triple backticks @@ -201,7 +224,7 @@ def closeJsonStructures(text: str) -> str: # Look for patterns like: "value" or "value\n (unterminated) # Check if we're in the middle of a string value when text ends if result.strip(): - import re + # re is already imported at module level # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') if quoteCount % 2 == 1: @@ -367,7 +390,7 @@ def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str] Remove the last item if it appears to be incomplete/corrupted. This prevents corrupted data from being included in the final result. """ - import re + # re is already imported at module level if not items: return items @@ -418,7 +441,7 @@ def _extractGenericContent(text: str) -> List[Dict[str, Any]]: CRITICAL: Must preserve original content_type and id from the JSON structure! """ - import re + # re is already imported at module level sections = [] @@ -1025,7 +1048,7 @@ def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> if not cut_off_element: # Extract the last incomplete part from raw JSON # Find the last incomplete string/number/array - import re + # re is already imported at module level # Look for incomplete string at the end incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL) if incomplete_match: @@ -1045,7 +1068,7 @@ def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optiona This helps identify where exactly to continue within nested structures. """ - import re + # re is already imported at module level # Check for code_block with nested JSON if "code" in element: diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py index f0ba9d4d..8ebe6679 100644 --- a/modules/workflows/methods/methodAi/actions/__init__.py +++ b/modules/workflows/methods/methodAi/actions/__init__.py @@ -8,9 +8,7 @@ from .process import process from .webResearch import webResearch from .summarizeDocument import summarizeDocument from .translateDocument import translateDocument -from .convert import convert from .convertDocument import convertDocument -from .extractData import extractData from .generateDocument import generateDocument __all__ = [ @@ -18,9 +16,7 @@ __all__ = [ 'webResearch', 'summarizeDocument', 'translateDocument', - 'convert', 'convertDocument', - 'extractData', 'generateDocument', ] diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py deleted file mode 100644 index 788fadea..00000000 --- a/modules/workflows/methods/methodAi/actions/convert.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Convert action for AI operations. -Converts documents/data between different formats with specific formatting options. -""" - -import logging -import json -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelDocref import DocumentReferenceList - -logger = logging.getLogger(__name__) - -@action -async def convert(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters). - - Input requirements: documentList (required); inputFormat and outputFormat (required). - - Output format: Document in target format with specified formatting options. - - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed). - - Parameters: - - documentList (list, required): Document reference(s) to convert. - - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.). - - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.). - - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect. - - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,). - - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True. - - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - inputFormat = parameters.get("inputFormat") - outputFormat = parameters.get("outputFormat") - if not inputFormat or not outputFormat: - return ActionResult.isFailure(error="inputFormat and outputFormat are required") - - # Normalize formats (remove leading dot if present) - normalizedInputFormat = inputFormat.strip().lstrip('.').lower() - normalizedOutputFormat = outputFormat.strip().lstrip('.').lower() - - # Get documents - if isinstance(documentList, DocumentReferenceList): - docRefList = documentList - elif isinstance(documentList, list): - docRefList = DocumentReferenceList.from_string_list(documentList) - else: - docRefList = DocumentReferenceList.from_string_list([documentList]) - - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if not chatDocuments: - return ActionResult.isFailure(error="No documents found in documentList") - - # Check if input is standardized JSON format - if so, use direct rendering - if normalizedInputFormat == "json" and len(chatDocuments) == 1: - try: - doc = chatDocuments[0] - # ChatDocument doesn't have documentData - need to load file content using fileId - docBytes = self.services.chat.getFileData(doc.fileId) - if not docBytes: - raise ValueError(f"No file data found for fileId={doc.fileId}") - - # Decode bytes to string - docData = docBytes.decode('utf-8') - - # Try to parse as JSON - if isinstance(docData, str): - jsonData = json.loads(docData) - elif isinstance(docData, dict): - jsonData = docData - else: - jsonData = None - - # Check if it's standardized JSON format (has "documents" or "sections") - if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)): - # Use direct rendering - no AI call needed! - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - # Ensure format is "documents" array - if "documents" not in jsonData: - jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]} - - # Get title - title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document") - - # Render with options - renderOptions = {} - if normalizedOutputFormat == "csv": - renderOptions["delimiter"] = parameters.get("delimiter", ",") - renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") - renderOptions["includeHeader"] = parameters.get("includeHeader", True) - - rendered_content, mime_type, _images = await generationService.renderReport( - jsonData, normalizedOutputFormat, title, None, None - ) - - # Apply CSV options if needed (renderer will handle them) - if normalizedOutputFormat == "csv" and renderOptions: - rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions) - - validationMetadata = { - "actionType": "ai.convert", - "inputFormat": normalizedInputFormat, - "outputFormat": normalizedOutputFormat, - "hasSourceJson": True, - "conversionType": "direct_rendering" - } - actionDoc = ActionDocument( - documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}", - documentData=rendered_content, - mimeType=mime_type, - sourceJson=jsonData, # Preserve source JSON for structure validation - validationMetadata=validationMetadata - ) - - return ActionResult.isSuccess(documents=[actionDoc]) - - except Exception as e: - logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}") - # Fall through to AI-based conversion - - # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions) - columnsPerRow = parameters.get("columnsPerRow") - delimiter = parameters.get("delimiter", ",") - includeHeader = parameters.get("includeHeader", True) - language = parameters.get("language", "en") - - aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format." - - if normalizedOutputFormat == "csv": - aiPrompt += f" Use '{delimiter}' as the delimiter character." - if columnsPerRow: - aiPrompt += f" Format the output with {columnsPerRow} columns per row." - if not includeHeader: - aiPrompt += " Do not include a header row." - else: - aiPrompt += " Include a header row with column names." - - if language and language != "en": - aiPrompt += f" Use language: {language}." - - aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": normalizedOutputFormat - }) - diff --git a/modules/workflows/methods/methodAi/actions/extractData.py b/modules/workflows/methods/methodAi/actions/extractData.py deleted file mode 100644 index 723914bd..00000000 --- a/modules/workflows/methods/methodAi/actions/extractData.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Extract Data action for AI operations. -Extracts structured data from documents (key-value pairs, entities, facts, etc.). -""" - -import logging -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult - -logger = logging.getLogger(__name__) - -@action -async def extractData(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.). - - Input requirements: documentList (required); optional dataStructure, fields. - - Output format: JSON by default, or specified resultType. - - Parameters: - - documentList (list, required): Document reference(s) to extract data from. - - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested. - - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]). - - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - dataStructure = parameters.get("dataStructure", "nested") - fields = parameters.get("fields", []) - resultType = parameters.get("resultType", "json") - - aiPrompt = "Extract structured data from the provided document(s)." - if fields: - fieldsStr = ", ".join(fields) - aiPrompt += f" Extract the following specific fields: {fieldsStr}." - else: - aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information." - - structureInstructions = { - "flat": "Use a flat key-value structure with simple properties.", - "nested": "Use a nested JSON structure with logical grouping of related data.", - "list": "Structure the data as a list/array of objects, one per entity or record." - } - aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}" - - aiPrompt += " Ensure all extracted data is accurate and complete." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": resultType - }) - diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 5b5db12f..6569ddab 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -3,18 +3,17 @@ """ Generate Document action for AI operations. -Generates documents from scratch or based on templates/inputs using hierarchical approach. +Wrapper around AI service callAiContent method. """ import logging import time -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy -from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator -from modules.services.serviceGeneration.subContentGenerator import ContentGenerator -from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum +from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData logger = logging.getLogger(__name__) @@ -59,38 +58,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: resultType = "txt" logger.info(f"Auto-detected Text format from prompt") - maxSectionLength = parameters.get("maxSectionLength", 500) - parallelGeneration = parameters.get("parallelGeneration", True) - progressLogging = parameters.get("progressLogging", True) - # Create operation ID for progress tracking workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"doc_gen_{workflowId}_{int(time.time())}" parentOperationId = parameters.get('parentOperationId') try: - # Phase 1: Structure Generation - if progressLogging: - self.services.chat.progressLogStart( - operationId, - "Document", - "Structure Generation", - "Generating document structure...", - parentOperationId=parentOperationId - ) - - structureGenerator = StructureGenerator(self.services) - - # Analyze document purposes and process documents accordingly - cachedContent = None - imageDocuments = [] - documentPurposes = {} - + # Convert documentList to DocumentReferenceList if needed + docRefList = None if documentList: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") - - # Convert documentList to DocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList if isinstance(documentList, DocumentReferenceList): @@ -101,301 +77,78 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: docRefList = DocumentReferenceList.from_string_list(documentList) else: docRefList = DocumentReferenceList(references=[]) - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if chatDocuments: - logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") - - # Analyze document purposes using AI - purposeAnalyzer = DocumentPurposeAnalyzer(self.services) - purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( - userPrompt=prompt, - chatDocuments=chatDocuments, - actionContext="generateDocument" - ) - - documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} - logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") - - # Separate documents by purpose - textDocs = [] - imageDocsToInclude = [] - imageDocsToAnalyze = [] - - for doc in chatDocuments: - docPurpose = documentPurposes.get(doc.id, {}) - purpose = docPurpose.get("purpose", "extract_text_content") - - if purpose == "include_image": - imageDocsToInclude.append(doc) - elif purpose == "analyze_image_vision": - imageDocsToAnalyze.append(doc) - elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: - textDocs.append(doc) - # Skip "attach" purpose - don't process - - # Process text documents (extract content) - extractedResults = [] - if textDocs: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") - - # Prepare extraction options with purpose-specific prompts - extractionOptionsList = [] - for doc in textDocs: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - extractionOptionsList.append((doc, extractionOptions)) - - # Extract content from text documents - for doc, extractionOptions in extractionOptionsList: - try: - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") - - logger.info(f"Extracted content from {len(extractedResults)} text document(s)") - - # Process images to analyze (vision call) - if imageDocsToAnalyze: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") - - # Extract content from images using vision analysis - for doc in imageDocsToAnalyze: - try: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") - - logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") - - # Process images to include (store image data) - if imageDocsToInclude: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") - - # Get image data for inclusion - from modules.interfaces.interfaceDbComponentObjects import getInterface - dbInterface = getInterface() - - for doc in imageDocsToInclude: - try: - # Get image bytes - imageBytes = dbInterface.getFileData(doc.fileId) - if imageBytes: - # Encode to base64 - import base64 - base64Data = base64.b64encode(imageBytes).decode('utf-8') - - # Create image document entry - imageDoc = { - "id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "base64Data": base64Data, - "altText": doc.fileName or "Image", - "fileSize": doc.fileSize - } - imageDocuments.append(imageDoc) - logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") - else: - logger.warning(f"Could not retrieve image data for {doc.fileName}") - except Exception as e: - logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") - - logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") - - # Build cachedContent with all information - cachedContent = { - "extractedContent": extractedResults, - "imageDocuments": imageDocuments, - "documentPurposes": documentPurposes, - "extractionTimestamp": time.time(), - "sourceDocuments": [doc.id for doc in chatDocuments] - } - - logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") - # Generate structure - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") + # Prepare title + title = parameters.get("documentType") or "Generated Document" - structure = await structureGenerator.generateStructure( - userPrompt=prompt, - documentList=documentList if documentList else None, - cachedContent=cachedContent, - maxSectionLength=maxSectionLength, - existingImages=imageDocuments # Pass existing images for structure generation + # Call AI service for document generation + # callAiContent handles documentList internally via Phases 5A-5E + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED, + compressPrompt=False, + compressContext=False ) - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") - - # Phase 2: Content Generation - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.34, - "Starting content generation..." - ) - - contentGenerator = ContentGenerator(self.services) - - # Create enhanced progress callback - def progressCallback(sectionIndex: int, totalSections: int, message: str): - if progressLogging: - # Calculate progress: 34% to 90% for content generation phase - if totalSections > 0: - progress = 0.34 + (0.56 * (sectionIndex / totalSections)) - else: - progress = 0.34 - - # Format message - if sectionIndex > 0 and totalSections > 0: - progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" - else: - progressMessage = message - - self.services.chat.progressLogUpdate( - operationId, - progress, - progressMessage - ) - - completeStructure = await contentGenerator.generateContent( - structure=structure, - cachedContent=cachedContent, - userPrompt=prompt, - progressCallback=progressCallback, - parallelGeneration=parallelGeneration - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") - - # Phase 3: Integration & Rendering - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.91, - "Rendering final document..." - ) - - # Use existing renderReport method - title = structure.get("metadata", {}).get("title", "Generated Document") - if documentType: - title = f"{title} ({documentType})" - - renderedContent, mimeType, images = await self.services.generation.renderReport( - extractedContent=completeStructure, + aiResponse: AiResponse = await self.services.ai.callAiContent( + prompt=prompt, + options=options, + documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E outputFormat=resultType, title=title, - userPrompt=prompt, - aiService=self.services.ai + parentOperationId=parentOperationId ) - # Build list of documents to return - documents = [ - ActionDocument( - documentName=f"document.{resultType}", - documentData=renderedContent, - mimeType=mimeType - ) - ] + # Convert AiResponse to ActionResult + documents = [] - # Add images as separate documents - if images: - logger.info(f"Processing {len(images)} image(s) from renderer") - import base64 - for idx, imageData in enumerate(images): - try: - base64Data = imageData.get("base64Data", "") - altText = imageData.get("altText", f"image_{idx + 1}") - caption = imageData.get("caption", "") - sectionId = imageData.get("sectionId", f"section_{idx + 1}") - - if base64Data: - # Decode base64 to bytes - imageBytes = base64.b64decode(base64Data) - - # Determine filename and mime type - filename = imageData.get("filename", f"image_{idx + 1}.png") - if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): - filename = f"image_{idx + 1}.png" - - # Determine mime type from filename - if filename.lower().endswith('.png'): - imageMimeType = "image/png" - elif filename.lower().endswith(('.jpg', '.jpeg')): - imageMimeType = "image/jpeg" - elif filename.lower().endswith('.gif'): - imageMimeType = "image/gif" - elif filename.lower().endswith('.webp'): - imageMimeType = "image/webp" - else: - imageMimeType = "image/png" # Default - - # Add image document - documents.append(ActionDocument( - documentName=filename, - documentData=imageBytes, - mimeType=imageMimeType - )) - logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") + # Convert DocumentData to ActionDocument + if aiResponse.documents: + for docData in aiResponse.documents: + documents.append(ActionDocument( + documentName=docData.documentName, + documentData=docData.documentData, + mimeType=docData.mimeType, + sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None + )) + + # If no documents but content exists, create a document from content + if not documents and aiResponse.content: + # Determine document name from metadata + docName = f"document.{resultType}" + if aiResponse.metadata and aiResponse.metadata.filename: + docName = aiResponse.metadata.filename + elif aiResponse.metadata and aiResponse.metadata.title: + import re + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{resultType}"): + docName = f"{sanitized}.{resultType}" else: - logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") - except Exception as e: - logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) - continue - else: - logger.debug("No images returned from renderer") - - # Note: Document creation is handled by the workflow system - # We just return the rendered content and images in ActionResult - - if progressLogging: - self.services.chat.progressLogFinish(operationId, True) + docName = sanitized + + # Determine mime type + mimeType = "text/plain" + if resultType == "html": + mimeType = "text/html" + elif resultType == "json": + mimeType = "application/json" + elif resultType == "pdf": + mimeType = "application/pdf" + elif resultType == "md": + mimeType = "text/markdown" + + documents.append(ActionDocument( + documentName=docName, + documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content, + mimeType=mimeType + )) return ActionResult.isSuccess(documents=documents) except Exception as e: - logger.error(f"Error in hierarchical document generation: {str(e)}") - if progressLogging: - self.services.chat.progressLogFinish(operationId, False) + logger.error(f"Error in document generation: {str(e)}") return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 2468d949..5abc57cd 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -8,11 +8,12 @@ Universal AI document processing action. import logging import time +import json from typing import Dict, Any, List, Optional from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelAi import AiCallOptions -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart +from modules.datamodels.datamodelExtraction import ContentPart logger = logging.getLogger(__name__) @@ -82,8 +83,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available logger.info(f"Using result type: {resultType} -> {output_extension}") - # Phase 7.3: Extract content first if documents provided, then use contentParts - # Check if contentParts are already provided (preferred path) + # Check if contentParts are already provided (from context.extractContent or other sources) contentParts: Optional[List[ContentPart]] = None if "contentParts" in parameters: contentParts = parameters.get("contentParts") @@ -95,63 +95,42 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty") contentParts = None - # If contentParts not provided but documentList is, extract content first - if not contentParts and documentList.references: - self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if not chatDocuments: - logger.warning("No documents found in documentList") - else: - logger.info(f"Extracting content from {len(chatDocuments)} documents") - - # Prepare extraction options (use defaults if not provided) - extractionOptions = parameters.get("extractionOptions") - if not extractionOptions: - extractionOptions = ExtractionOptions( - prompt="Extract all content from the document", - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - # Extract content using extraction service with hierarchical progress logging - # Pass operationId for per-document progress tracking - extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) - - # Combine all ContentParts from all extracted results - contentParts = [] - for extracted in extractedResults: - if extracted.parts: - contentParts.extend(extracted.parts) - - logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents") - # Update progress - preparing AI call self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call") - # Build options with only resultFormat - let service layer handle all other parameters + # Build options output_format = output_extension.replace('.', '') or 'txt' options = AiCallOptions( resultFormat=output_format - # Removed all model parameters - service layer will analyze prompt and determine optimal parameters ) # Update progress - calling AI self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI") - # Use unified callAiContent method with contentParts (extraction is now separate) - aiResponse = await self.services.ai.callAiContent( - prompt=aiPrompt, - options=options, - contentParts=contentParts, # Already extracted (or None if no documents) - outputFormat=output_format, - parentOperationId=operationId - ) + # Use unified callAiContent method + # If contentParts provided (pre-extracted), use them directly + # Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally + # Note: ContentExtracted documents (from context.extractContent) are now handled + # automatically in _extractAndPrepareContent() (Phase 5B) + if contentParts: + # Pre-extracted ContentParts - use them directly + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Pre-extracted ContentParts + outputFormat=output_format, + parentOperationId=operationId + ) + else: + # Pass documentList - callAiContent handles Phases 5A-5E internally + # This includes automatic detection of ContentExtracted documents + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # callAiContent macht Phasen 5A-5E + outputFormat=output_format, + parentOperationId=operationId + ) # Update progress - processing result self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result") diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 7595c2eb..881b007d 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -15,9 +15,7 @@ from .actions.process import process from .actions.webResearch import webResearch from .actions.summarizeDocument import summarizeDocument from .actions.translateDocument import translateDocument -from .actions.convert import convert from .actions.convertDocument import convertDocument -from .actions.extractData import extractData from .actions.generateDocument import generateDocument logger = logging.getLogger(__name__) @@ -192,69 +190,6 @@ class MethodAi(MethodBase): }, execute=translateDocument.__get__(self, self.__class__) ), - "convert": WorkflowActionDefinition( - actionId="ai.convert", - description="Convert documents/data between different formats with specific formatting options", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to convert" - ), - "inputFormat": WorkflowActionParameter( - name="inputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx", "txt"], - required=True, - description="Source format" - ), - "outputFormat": WorkflowActionParameter( - name="outputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["csv", "json", "xlsx", "txt"], - required=True, - description="Target format" - ), - "columnsPerRow": WorkflowActionParameter( - name="columnsPerRow", - type="int", - frontendType=FrontendType.NUMBER, - required=False, - description="For CSV output, number of columns per row. Default: auto-detect", - validation={"min": 1, "max": 100} - ), - "delimiter": WorkflowActionParameter( - name="delimiter", - type="str", - frontendType=FrontendType.TEXT, - required=False, - default=",", - description="For CSV output, delimiter character" - ), - "includeHeader": WorkflowActionParameter( - name="includeHeader", - type="bool", - frontendType=FrontendType.CHECKBOX, - required=False, - default=True, - description="For CSV output, whether to include header row" - ), - "language": WorkflowActionParameter( - name="language", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["de", "en", "fr"], - required=False, - default="en", - description="Language for output" - ) - }, - execute=convert.__get__(self, self.__class__) - ), "convertDocument": WorkflowActionDefinition( actionId="ai.convertDocument", description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)", @@ -285,45 +220,6 @@ class MethodAi(MethodBase): }, execute=convertDocument.__get__(self, self.__class__) ), - "extractData": WorkflowActionDefinition( - actionId="ai.extractData", - description="Extract structured data from documents (key-value pairs, entities, facts, etc.)", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to extract data from" - ), - "dataStructure": WorkflowActionParameter( - name="dataStructure", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["flat", "nested", "list"], - required=False, - default="nested", - description="Desired data structure" - ), - "fields": WorkflowActionParameter( - name="fields", - type="List[str]", - frontendType=FrontendType.MULTISELECT, - required=False, - description="Specific fields/properties to extract (e.g., [name, date, amount])" - ), - "resultType": WorkflowActionParameter( - name="resultType", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx"], - required=False, - default="json", - description="Output format" - ) - }, - execute=extractData.__get__(self, self.__class__) - ), "generateDocument": WorkflowActionDefinition( actionId="ai.generateDocument", description="Generate documents from scratch or based on templates/inputs", @@ -371,9 +267,7 @@ class MethodAi(MethodBase): self.webResearch = webResearch.__get__(self, self.__class__) self.summarizeDocument = summarizeDocument.__get__(self, self.__class__) self.translateDocument = translateDocument.__get__(self, self.__class__) - self.convert = convert.__get__(self, self.__class__) self.convertDocument = convertDocument.__get__(self, self.__class__) - self.extractData = extractData.__get__(self, self.__class__) self.generateDocument = generateDocument.__get__(self, self.__class__) def _format_timestamp_for_filename(self) -> str: diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 8c5fd5fb..949ac63d 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -19,10 +19,21 @@ logger = logging.getLogger(__name__) @action async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: """ - Extract content from documents (separate from AI calls). + Extract raw content parts from documents without AI processing. - This action performs pure content extraction without AI processing. - The extracted ContentParts can then be used by subsequent AI processing actions. + This action performs pure content extraction WITHOUT AI/OCR processing. + It returns ContentParts with different typeGroups: + - "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.) + - "image": Images as base64-encoded data (NOT converted to text, no OCR) + - "table": Tables as structured data + - "structure": Structured content (JSON, etc.) + - "container": Container elements (PDF pages, etc.) + + IMPORTANT: + - Images are returned as base64 data, NOT as extracted text + - No OCR is performed - images are preserved as visual elements + - Text extraction only works for text-based formats (not images) + - The extracted ContentParts can then be used by subsequent AI processing actions Parameters: - documentList (list, required): Document reference(s) to extract content from. @@ -30,7 +41,8 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: Returns: - ActionResult with ActionDocument containing ContentExtracted objects - - ContentExtracted.parts contains List[ContentPart] (already chunked if needed) + - ContentExtracted.parts contains List[ContentPart] with various typeGroups + - Each ContentPart has a typeGroup indicating its type (text, image, table, etc.) """ try: # Init progress logger @@ -79,12 +91,26 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: # Convert dict to ExtractionOptions object if needed, or create defaults if extractionOptionsParam: if isinstance(extractionOptionsParam, dict): + # Ensure required fields are present + if "prompt" not in extractionOptionsParam: + extractionOptionsParam["prompt"] = "Extract all content from the document" + if "mergeStrategy" not in extractionOptionsParam: + extractionOptionsParam["mergeStrategy"] = MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ) # Convert dict to ExtractionOptions object - extractionOptions = ExtractionOptions(**extractionOptionsParam) + try: + extractionOptions = ExtractionOptions(**extractionOptionsParam) + except Exception as e: + logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults") + extractionOptions = None elif isinstance(extractionOptionsParam, ExtractionOptions): extractionOptions = extractionOptionsParam else: # Invalid type, use defaults + logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults") extractionOptions = None else: extractionOptions = None diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index a635764f..942f3f85 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -50,7 +50,7 @@ class MethodContext(MethodBase): ), "extractContent": WorkflowActionDefinition( actionId="context.extractContent", - description="Extract content from documents (separate from AI calls)", + description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.", parameters={ "documentList": WorkflowActionParameter( name="documentList", @@ -64,7 +64,7 @@ class MethodContext(MethodBase): type="dict", frontendType=FrontendType.JSON, required=False, - description="Extraction options (if not provided, defaults are used)" + description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text." ) }, execute=extractContent.__get__(self, self.__class__) diff --git a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md deleted file mode 100644 index 39c649ce..00000000 --- a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md +++ /dev/null @@ -1,354 +0,0 @@ -# Architecture & Implementation Analysis -## Deep Review of Hierarchical Document Generation - -**Date**: 2025-12-22 -**Status**: Critical Issues Found - ---- - -## Executive Summary - -The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed. - ---- - -## ✅ What's Correctly Implemented - -### Phase 1: Core Infrastructure ✅ -- ✅ `StructureGenerator` class exists with `generateStructure()` method -- ✅ `ContentGenerator` class exists with `generateContent()` method -- ✅ `ContentIntegrator` class exists with `integrateContent()` method -- ✅ `generateDocument` action uses hierarchical approach -- ✅ Basic progress logging implemented -- ✅ Error handling with `createErrorSection()` implemented - -### Phase 2: Image Generation ✅ -- ✅ `_generateImageSection()` method implemented -- ✅ Image prompt extraction from structure -- ✅ Base64 image data storage -- ✅ Error handling for image failures - -### Phase 3: Parallel Processing ✅ -- ✅ `_generateSectionsParallel()` method implemented -- ✅ `_generateSectionsSequential()` method implemented -- ✅ Batch processing for large documents -- ✅ Progress callback system -- ✅ Exception handling in parallel execution - ---- - -## ❌ Critical Issues Found - -### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED** - -**Problem**: -- In parallel mode, sections within the same batch cannot see each other (correct) -- BUT: Sections in later batches should see sections from earlier batches -- **Current Status**: Code was fixed to accumulate previous sections, but needs verification - -**Location**: `subContentGenerator.py` lines 240-319 - -**Fix Applied**: -- Added `accumulatedPreviousSections` to track sections across batches -- Pass accumulated sections to each batch -- **VERIFICATION NEEDED**: Test that prompts actually show previous sections - -**Risk**: Medium - May cause continuity issues in generated content - ---- - -### Issue 2: Variable Shadowing Bug ✅ **FIXED** - -**Problem**: -- `contentType` variable was shadowed in loop, causing wrong section type in prompts - -**Location**: `subContentGenerator.py` line 676 - -**Fix Applied**: -- Renamed loop variable to `prevContentType` - -**Status**: ✅ Fixed - ---- - -### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED** - -**Problem**: -- Structure generator creates generic hints like "Section heading" instead of meaningful hints -- AI generates same content for all headings because hints are identical - -**Location**: `subStructureGenerator.py` lines 242-269 - -**Fix Applied**: -- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs -- Example: `section_heading_current_state` → "Current State" - -**Status**: ✅ Fixed - ---- - -### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED** - -**Problem**: -- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays -- Template missing `generation_hint` and `complexity` fields -- Template showed `order: 0` but should start from 1 - -**Location**: `datamodelJson.py` - -**Fix Applied**: -- Updated template to show empty `elements: []` -- Added `generation_hint` to all sections -- Added `complexity` to all sections -- Changed `order` to start from 1 -- Added `title` to metadata - -**Status**: ✅ Fixed - ---- - -### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED** - -**Problem**: -- Prompt said "All sections must have empty elements arrays" but template showed filled arrays -- Prompt didn't explicitly require `generation_hint` and `complexity` fields - -**Location**: `subStructureGenerator.py` lines 181-190 - -**Fix Applied**: -- Enhanced prompt to explicitly require `generation_hint` and `complexity` -- Clarified that template examples show structure, but elements must be empty - -**Status**: ✅ Fixed - ---- - -## ⚠️ Remaining Issues & Gaps - -### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No validation that structure has required fields before content generation -- No check that all sections have `generation_hint` before generating content - -**Expected** (from Phase 6): -```python -# Validate structure before content generation -if not validateStructure(structure): - raise ValueError("Invalid structure") -``` - -**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate - -**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better - -**Recommendation**: Add explicit validation method - ---- - -### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing -- Should show `generation_hint` as fallback when elements not available - -**Location**: `subContentGenerator.py` lines 671-709 - -**Current Behavior**: -- Shows content preview if elements exist -- Shows nothing if elements don't exist - -**Expected Behavior**: -- Show content preview if elements exist -- Show `generation_hint` as fallback if elements don't exist - -**Impact**: Medium - Reduces context quality in parallel generation - -**Recommendation**: Add fallback to show `generation_hint` when elements not available - ---- - -### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED** - -**Problem**: -- Debug file writes `aiResponse.content` (raw AI response) before validation -- Can't verify if `generation_hint` was added by validation - -**Location**: `subStructureGenerator.py` lines 77-84 - -**Impact**: Low - Makes debugging harder but doesn't affect functionality - -**Recommendation**: Write validated structure to separate debug file - ---- - -### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No unit tests for any components (Phase 7 requirement) -- No tests for structure generation -- No tests for content generation -- No tests for integration - -**Impact**: High - No way to verify correctness or catch regressions - -**Recommendation**: Add comprehensive unit tests - ---- - -### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No end-to-end tests -- No tests with images -- No tests with long documents -- No error scenario tests - -**Impact**: High - No verification of complete flow - -**Recommendation**: Add integration tests - ---- - -### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Content is extracted and cached, but: - - No cache validation (check if documents changed) - - No cache reuse verification - - Content is passed to prompts but may not be formatted efficiently - -**Expected** (from Phase 5): -- Cache validation -- Efficient formatting -- Performance testing - -**Current**: Basic caching exists but not optimized - -**Impact**: Medium - Works but could be more efficient - -**Recommendation**: Add cache validation and optimization - ---- - -### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN** - -**Problem**: -- Implementation plan requires renderer updates for images -- HTML renderer should create separate image files -- PDF/XLSX/PPTX renderers should embed images -- **Status unknown** - need to verify renderers handle images correctly - -**Impact**: High - Images may not render correctly - -**Recommendation**: Verify all renderers handle images correctly - ---- - -## 📋 Architecture Compliance Check - -### Data Structure Compliance ✅ - -| Field | Required | Implemented | Status | -|-------|----------|-------------|--------| -| `metadata.title` | Yes | ✅ | ✅ | -| `metadata.split_strategy` | Yes | ✅ | ✅ | -| `sections[].id` | Yes | ✅ | ✅ | -| `sections[].content_type` | Yes | ✅ | ✅ | -| `sections[].complexity` | Yes | ✅ | ✅ | -| `sections[].generation_hint` | Yes | ✅ | ✅ | -| `sections[].order` | Yes | ✅ | ✅ | -| `sections[].elements` | Yes | ✅ | ✅ | -| `sections[].image_prompt` | Image only | ✅ | ✅ | - -### Component Method Compliance ✅ - -| Component | Method | Required | Implemented | Status | -|-----------|--------|----------|-------------|--------| -| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ | -| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ | -| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ | -| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ | -| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ | -| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ | -| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ | -| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ | - ---- - -## 🎯 Priority Fixes Needed - -### Critical (Must Fix) -1. ✅ **Issue 2**: Variable shadowing bug - **FIXED** -2. ✅ **Issue 3**: Missing generation_hint - **FIXED** -3. ✅ **Issue 4**: JSON template mismatch - **FIXED** -4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED** -5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION** - -### High Priority (Should Fix) -6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION** -7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED** -8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED** - -### Medium Priority (Nice to Have) -9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED** -10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED** -11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED** -12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED** - ---- - -## ✅ Summary - -### What Works -- Core infrastructure is implemented -- Image generation is integrated -- Parallel processing is implemented -- Error handling is in place -- Progress logging works - -### What's Fixed (This Session) -- Variable shadowing bug -- Missing generation_hint extraction -- JSON template architecture mismatch -- Prompt instructions clarity -- Previous sections tracking (needs verification) - -### What Needs Work -- Unit and integration tests -- Renderer verification -- Previous sections formatting fallback -- Cache optimization -- Structure validation - -### Overall Status -**Architecture**: ✅ **85% Compliant** -**Implementation**: ✅ **80% Complete** -**Testing**: ❌ **0% Complete** -**Production Ready**: ⚠️ **Not Yet** (needs testing and verification) - ---- - -## Next Steps - -1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode -2. **Verify Issue 12**: Test that all renderers handle images correctly -3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator) -4. **Add Integration Tests**: Test end-to-end flow with various scenarios -5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available -6. **Add Structure Validation**: Explicit validation before content generation -7. **Optimize Content Caching**: Add cache validation and efficient formatting - ---- - -**Analysis Complete**: 2025-12-22 - diff --git a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index d0a59e80..00000000 --- a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,459 +0,0 @@ -# Concept: Hierarchical Document Generation with Image Integration - -## Executive Summary - -This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently. - -**Key Decisions**: -- ✅ **Performance**: Parallel processing with ChatLog progress messages -- ✅ **Error Handling**: Skip failed sections, show error messages -- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access) -- ✅ **Backward Compatibility**: Not needed - implement as new default - -**Renderer Status**: -- ✅ **Ready**: Text, Markdown, DOCX renderers -- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images) -- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support) - -## Problem Statement - -Currently, the document generation system has the following limitations: - -1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures -2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters) -3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily -4. **No Structured Approach**: No mechanism to first define document structure, then populate sections - -## Current Architecture Analysis - -### Current Flow: -``` -User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document -``` - -### Issues: -- AI generates complete JSON structure in one pass -- Images are generated separately via `ai.generate` action -- No mechanism to integrate generated images into document structure -- JSON schema supports `image` content_type, but AI rarely generates it -- Content extraction happens per action, not cached/reused - -### Current Image Handling: -- Images can be rendered IF they exist in JSON structure (`content_type: "image"`) -- Image data expected as `base64Data` in elements -- Renderers support image rendering (Docx, PDF, HTML, etc.) -- But images are never generated WITHIN document generation - -## Proposed Solution: Hierarchical Document Generation - -### Core Concept - -**Three-Phase Approach:** -1. **Structure Generation Phase**: Generate document skeleton with section placeholders -2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts -3. **Integration Phase**: Merge all generated content into final document structure - -### Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Phase 1: Structure Generation │ -│ - Generate document skeleton │ -│ - Identify sections (text, image, complex) │ -│ - Create section placeholders with metadata │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 2: Content Generation (Tree-like) │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 1: Heading (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 2: Paragraph (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 3: Image (complex) │ │ -│ │ → Sub-prompt: Generate image │ │ -│ │ → Store image data │ │ -│ │ → Create image section with base64Data │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 4: Long Chapter (complex) │ │ -│ │ → Sub-prompt: Generate chapter content │ │ -│ │ → Split into subsections if needed │ │ -│ └──────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 3: Integration │ -│ - Merge all generated content │ -│ - Replace placeholders with actual data │ -│ - Validate structure completeness │ -│ - Render to final format │ -└─────────────────────────────────────────────────────────────┘ -``` - -## Detailed Design - -### Phase 1: Structure Generation - -**Purpose**: Create document skeleton with section metadata - -**Process**: -1. AI generates document structure with sections -2. Each section includes: - - `id`: Unique identifier - - `content_type`: Type (heading, paragraph, image, table, etc.) - - `complexity`: "simple" or "complex" - - `generation_hint`: Instructions for content generation - - `order`: Section order - - `elements`: Empty or placeholder - -**Example Structure**: -```json -{ - "metadata": { - "title": "Children's Bedtime Story", - "split_strategy": "single_document" - }, - "documents": [{ - "id": "doc_1", - "sections": [ - { - "id": "section_title", - "content_type": "heading", - "complexity": "simple", - "generation_hint": "Story title", - "order": 1, - "elements": [] - }, - { - "id": "section_intro", - "content_type": "paragraph", - "complexity": "simple", - "generation_hint": "Introduction paragraph", - "order": 2, - "elements": [] - }, - { - "id": "section_image_1", - "content_type": "image", - "complexity": "complex", - "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest", - "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch", - "order": 3, - "elements": [] - }, - { - "id": "section_chapter_1", - "content_type": "paragraph", - "complexity": "complex", - "generation_hint": "First chapter: Rabbit's adventure begins", - "order": 4, - "elements": [] - } - ] - }] -} -``` - -### Phase 2: Content Generation - -**Purpose**: Generate actual content for each section - -**Process**: -1. Iterate through sections in order -2. For each section: - - **Simple sections** (heading, short paragraph): - - Generate content directly via AI - - Populate `elements` array - - **Complex sections** (image, long chapter): - - Create sub-prompt based on `generation_hint` and `image_prompt` - - Generate content via specialized action: - - Images: `ai.generate` with image generation - - Long text: `ai.process` with focused prompt - - Store generated content - - Populate `elements` array - -**Content Caching**: -- Extract content from source documents ONCE at the start -- Cache extracted content for reuse across all sections -- Pass cached content to sub-prompts to avoid re-extraction - -**Image Generation**: -- For `content_type: "image"` sections: - - Use `image_prompt` from structure - - Call `ai.generate` action with image generation - - Receive base64 image data - - Create image element: - ```json - { - "url": "data:image/png;base64,
`
- - Return multiple files: HTML file + image files
-
-4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE**
- - Currently: Shows placeholder `[Image: altText]`
- - **Required Change**: Embed images directly in PDF using reportlab
- - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes
-
-5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY**
- - Embeds images directly using `doc.add_picture()`
- - Adds captions below images
- - No changes needed
-
-6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- - Currently: No image handling found
- - **Required Change**: Add image support using openpyxl
- - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells
- - Store images in worksheet cells or as floating images
-
-7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION**
- - Currently: No image handling found
- - **Required Change**: Add image support using python-pptx
- - Implementation: Use `slide.shapes.add_picture()` to add images to slides
-
-### Renderer Update Requirements:
-
-**Priority 1 (Critical for HTML output)**:
-- HTML Renderer: Create separate image files and link them
-
-**Priority 2 (Important for document formats)**:
-- PDF Renderer: Embed images using reportlab
-- XLSX Renderer: Add image embedding support
-- PPTX Renderer: Add image embedding support
-
-## Answers to Open Questions
-
-### 1. Performance: How to handle very large documents (100+ sections)?
-
-**Answer**: Use parallel processing where possible, with progress ChatLog messages.
-
-**Implementation Strategy**:
-- **Parallel Section Generation**: Generate independent sections in parallel using asyncio
-- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time)
-- **Progress Tracking**: Send ChatLog progress updates:
- - "Generating structure..." (Phase 1)
- - "Generating content for section X/Y..." (Phase 2)
- - "Generating image for section X..." (Phase 2 - images)
- - "Merging content..." (Phase 3)
- - "Rendering final document..." (Phase 3)
-- **Streaming**: For very large documents, consider streaming partial results
-
-**Example Progress Messages**:
-```
-Phase 1: Structure Generation (0% → 33%)
-Phase 2: Content Generation (33% → 90%)
- - Section 1/10: Heading (34%)
- - Section 2/10: Paragraph (40%)
- - Section 3/10: Image generation (50%)
- - Section 4/10: Chapter (60%)
- ...
-Phase 3: Integration & Rendering (90% → 100%)
-```
-
-### 2. Error Handling: What if one section fails?
-
-**Answer**: Skip failed sections, keep section title and type, show error message in the section.
-
-**Implementation Strategy**:
-- **Graceful Degradation**: Continue processing remaining sections
-- **Error Section**: Create error placeholder section:
- ```json
- {
- "id": "section_failed_3",
- "content_type": "paragraph",
- "elements": [{
- "text": "[ERROR: Failed to generate content for this section. Error: