diff --git a/modules/services/serviceAi/README_MODULE_STRUCTURE.md b/modules/services/serviceAi/README_MODULE_STRUCTURE.md new file mode 100644 index 00000000..d2fca8f5 --- /dev/null +++ b/modules/services/serviceAi/README_MODULE_STRUCTURE.md @@ -0,0 +1,78 @@ +# Module Structure - serviceAi + +## Übersicht + +Das `mainServiceAi.py` Modul wurde in mehrere Submodule aufgeteilt, um die Übersichtlichkeit zu verbessern. + +## Modulstruktur + +### Hauptmodul +- **mainServiceAi.py** (~800 Zeilen) + - Initialisierung (`__init__`, `create`, `ensureAiObjectsInitialized`) + - Public API (`callAiPlanning`, `callAiContent`) + - Routing zu Submodulen + - Helper-Methoden + +### Submodule + +1. **subJsonResponseHandling.py** (bereits vorhanden) + - JSON Response Merging + - Section Merging + - Fragment Detection + +2. **subResponseParsing.py** (~200 Zeilen) + - `ResponseParser.extractSectionsFromResponse()` - Extrahiert Sections aus AI-Responses + - `ResponseParser.shouldContinueGeneration()` - Entscheidet ob Generation fortgesetzt werden soll + - `ResponseParser._isStuckInLoop()` - Loop-Detection + - `ResponseParser.extractDocumentMetadata()` - Extrahiert Metadaten + - `ResponseParser.buildFinalResultFromSections()` - Baut finales JSON + +3. **subDocumentIntents.py** (~300 Zeilen) + - `DocumentIntentAnalyzer.clarifyDocumentIntents()` - Analysiert Dokument-Intents + - `DocumentIntentAnalyzer.resolvePreExtractedDocument()` - Löst pre-extracted Dokumente auf + - `DocumentIntentAnalyzer._buildIntentAnalysisPrompt()` - Baut Intent-Analyse-Prompt + +4. **subContentExtraction.py** (~600 Zeilen) + - `ContentExtractor.extractAndPrepareContent()` - Extrahiert und bereitet Content vor + - `ContentExtractor.extractTextFromImage()` - Vision AI für Bilder + - `ContentExtractor.processTextContentWithAi()` - AI-Verarbeitung von Text + - `ContentExtractor._isBinary()` - Helper für Binary-Check + +5. **subStructureGeneration.py** (~200 Zeilen) + - `StructureGenerator.generateStructure()` - Generiert Dokument-Struktur + - `StructureGenerator._buildStructurePrompt()` - Baut Struktur-Prompt + +6. **subStructureFilling.py** (~400 Zeilen) + - `StructureFiller.fillStructure()` - Füllt Struktur mit Content + - `StructureFiller._buildSectionGenerationPrompt()` - Baut Section-Generation-Prompt + - `StructureFiller._findContentPartById()` - Helper für ContentPart-Suche + - `StructureFiller._needsAggregation()` - Entscheidet ob Aggregation nötig + +7. **subAiCallLooping.py** (~400 Zeilen) + - `AiCallLooper.callAiWithLooping()` - Haupt-Looping-Logik + - `AiCallLooper._defineKpisFromPrompt()` - KPI-Definition + +## Verwendung + +Alle Submodule werden über das Hauptmodul `AiService` verwendet: + +```python +# Initialisierung +aiService = await AiService.create(serviceCenter) + +# Submodule werden automatisch initialisiert +# aiService.responseParser +# aiService.intentAnalyzer +# aiService.contentExtractor +# etc. +``` + +## Migration + +Die öffentliche API bleibt unverändert. Interne Methoden wurden in Submodule verschoben: + +- `_extractSectionsFromResponse` → `responseParser.extractSectionsFromResponse` +- `_clarifyDocumentIntents` → `intentAnalyzer.clarifyDocumentIntents` +- `_extractAndPrepareContent` → `contentExtractor.extractAndPrepareContent` +- etc. + diff --git a/modules/services/serviceAi/REFACTORING_PLAN.md b/modules/services/serviceAi/REFACTORING_PLAN.md new file mode 100644 index 00000000..2ce7a717 --- /dev/null +++ b/modules/services/serviceAi/REFACTORING_PLAN.md @@ -0,0 +1,126 @@ +# Refactoring Plan für mainServiceAi.py + +## Ziel +Aufteilen des 3000-Zeilen-Moduls in überschaubare Submodule (~300-600 Zeilen pro Modul). + +## Vorgeschlagene Struktur + +### Bereits erstellt: +1. ✅ `subResponseParsing.py` - ResponseParser Klasse +2. ✅ `subDocumentIntents.py` - DocumentIntentAnalyzer Klasse + +### Noch zu erstellen: +3. `subContentExtraction.py` - ContentExtractor Klasse + - `extractAndPrepareContent()` (~490 Zeilen) + - `extractTextFromImage()` (~55 Zeilen) + - `processTextContentWithAi()` (~72 Zeilen) + - `_isBinary()` (~10 Zeilen) + +4. `subStructureGeneration.py` - StructureGenerator Klasse + - `generateStructure()` (~60 Zeilen) + - `_buildStructurePrompt()` (~130 Zeilen) + +5. `subStructureFilling.py` - StructureFiller Klasse + - `fillStructure()` (~290 Zeilen) + - `_buildSectionGenerationPrompt()` (~185 Zeilen) + - `_findContentPartById()` (~5 Zeilen) + - `_needsAggregation()` (~20 Zeilen) + +6. `subAiCallLooping.py` - AiCallLooper Klasse + - `callAiWithLooping()` (~405 Zeilen) + - `_defineKpisFromPrompt()` (~92 Zeilen) + +## Refactoring-Schritte für mainServiceAi.py + +### Schritt 1: Submodule-Initialisierung erweitern + +```python +def _initializeSubmodules(self): + """Initialize all submodules after aiObjects is ready.""" + if self.aiObjects is None: + raise RuntimeError("aiObjects must be initialized before initializing submodules") + + if self.extractionService is None: + logger.info("Initializing ExtractionService...") + self.extractionService = ExtractionService(self.services) + + # Neue Submodule initialisieren + from modules.services.serviceAi.subResponseParsing import ResponseParser + from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer + from modules.services.serviceAi.subContentExtraction import ContentExtractor + from modules.services.serviceAi.subStructureGeneration import StructureGenerator + from modules.services.serviceAi.subStructureFilling import StructureFiller + + if not hasattr(self, 'responseParser'): + self.responseParser = ResponseParser(self.services) + + if not hasattr(self, 'intentAnalyzer'): + self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self) + + if not hasattr(self, 'contentExtractor'): + self.contentExtractor = ContentExtractor(self.services, self) + + if not hasattr(self, 'structureGenerator'): + self.structureGenerator = StructureGenerator(self.services, self) + + if not hasattr(self, 'structureFiller'): + self.structureFiller = StructureFiller(self.services, self) +``` + +### Schritt 2: Methoden durch Delegation ersetzen + +**Beispiel für Response Parsing:** +```python +# ALT: +def _extractSectionsFromResponse(self, ...): + # 100 Zeilen Code + ... + +# NEU: +def _extractSectionsFromResponse(self, ...): + return self.responseParser.extractSectionsFromResponse(...) +``` + +**Beispiel für Document Intents:** +```python +# ALT: +async def _clarifyDocumentIntents(self, ...): + # 100 Zeilen Code + ... + +# NEU: +async def _clarifyDocumentIntents(self, ...): + return await self.intentAnalyzer.clarifyDocumentIntents(...) +``` + +### Schritt 3: Helper-Methoden beibehalten + +Kleine Helper-Methoden bleiben im Hauptmodul: +- `_buildPromptWithPlaceholders()` +- `_getIntentForDocument()` +- `_shouldSkipContentPart()` +- `_determineDocumentName()` + +### Schritt 4: Public API unverändert lassen + +Die öffentliche API (`callAiPlanning`, `callAiContent`) bleibt unverändert. + +## Erwartete Ergebnis-Größen + +- `mainServiceAi.py`: ~800-1000 Zeilen (von 3016) +- `subResponseParsing.py`: ~200 Zeilen ✅ +- `subDocumentIntents.py`: ~300 Zeilen ✅ +- `subContentExtraction.py`: ~600 Zeilen +- `subStructureGeneration.py`: ~200 Zeilen +- `subStructureFilling.py`: ~400 Zeilen +- `subAiCallLooping.py`: ~500 Zeilen + +**Gesamt: ~3000 Zeilen** (gleich, aber besser organisiert) + +## Vorteile + +1. **Übersichtlichkeit**: Jedes Modul hat eine klare Verantwortlichkeit +2. **Wartbarkeit**: Änderungen sind lokalisiert +3. **Testbarkeit**: Module können einzeln getestet werden +4. **Wiederverwendbarkeit**: Module können in anderen Kontexten verwendet werden + diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 74b90346..f8ab4dad 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -50,6 +50,33 @@ class AiService: if self.extractionService is None: logger.info("Initializing ExtractionService...") self.extractionService = ExtractionService(self.services) + + # Initialize new submodules + from modules.services.serviceAi.subResponseParsing import ResponseParser + from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer + from modules.services.serviceAi.subContentExtraction import ContentExtractor + from modules.services.serviceAi.subStructureGeneration import StructureGenerator + from modules.services.serviceAi.subStructureFilling import StructureFiller + + if not hasattr(self, 'responseParser'): + logger.info("Initializing ResponseParser...") + self.responseParser = ResponseParser(self.services) + + if not hasattr(self, 'intentAnalyzer'): + logger.info("Initializing DocumentIntentAnalyzer...") + self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self) + + if not hasattr(self, 'contentExtractor'): + logger.info("Initializing ContentExtractor...") + self.contentExtractor = ContentExtractor(self.services, self, self.intentAnalyzer) + + if not hasattr(self, 'structureGenerator'): + logger.info("Initializing StructureGenerator...") + self.structureGenerator = StructureGenerator(self.services, self) + + if not hasattr(self, 'structureFiller'): + logger.info("Initializing StructureFiller...") + self.structureFiller = StructureFiller(self.services, self) async def callAi(self, request: AiCallRequest, progressCallback=None): """Router: handles content parts via extractionService, text context via interface. @@ -684,6 +711,19 @@ If no trackable items can be identified, return: {{"kpis": []}} debugPrefix: str, allSections: List[Dict[str, Any]] = None, accumulationState: Optional[JsonAccumulationState] = None + ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: + """Delegate to ResponseParser.""" + return self.responseParser.extractSectionsFromResponse( + result, iteration, debugPrefix, allSections, accumulationState + ) + + def _extractSectionsFromResponse_OLD( + self, + result: str, + iteration: int, + debugPrefix: str, + allSections: List[Dict[str, Any]] = None, + accumulationState: Optional[JsonAccumulationState] = None ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: """ Extract sections from AI response, handling both valid and broken JSON. @@ -783,6 +823,18 @@ If no trackable items can be identified, return: {{"kpis": []}} iteration: int, wasJsonComplete: bool, rawResponse: str = None + ) -> bool: + """Delegate to ResponseParser.""" + return self.responseParser.shouldContinueGeneration( + allSections, iteration, wasJsonComplete, rawResponse + ) + + def _shouldContinueGeneration_OLD( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None ) -> bool: """ Determine if AI generation loop should continue. @@ -859,6 +911,13 @@ If no trackable items can be identified, return: {{"kpis": []}} def _extractDocumentMetadata( self, parsedResult: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """Delegate to ResponseParser.""" + return self.responseParser.extractDocumentMetadata(parsedResult) + + def _extractDocumentMetadata_OLD( + self, + parsedResult: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """ Extract document metadata (title, filename) from parsed AI response. @@ -885,6 +944,14 @@ If no trackable items can be identified, return: {{"kpis": []}} self, allSections: List[Dict[str, Any]], documentMetadata: Optional[Dict[str, Any]] = None + ) -> str: + """Delegate to ResponseParser.""" + return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) + + def _buildFinalResultFromSections_OLD( + self, + allSections: List[Dict[str, Any]], + documentMetadata: Optional[Dict[str, Any]] = None ) -> str: """ Build final JSON result from accumulated sections. @@ -1090,6 +1157,18 @@ If no trackable items can be identified, return: {{"kpis": []}} userPrompt: str, actionParameters: Dict[str, Any], parentOperationId: str + ) -> List[DocumentIntent]: + """Delegate to DocumentIntentAnalyzer.""" + return await self.intentAnalyzer.clarifyDocumentIntents( + documents, userPrompt, actionParameters, parentOperationId + ) + + async def _clarifyDocumentIntents_OLD( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str ) -> List[DocumentIntent]: """ Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. @@ -1189,6 +1268,7 @@ If no trackable items can be identified, return: {{"kpis": []}} Returns None wenn kein pre-extracted Format erkannt wird. """ if document.mimeType != "application/json": + logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check") return None try: @@ -1208,10 +1288,14 @@ If no trackable items can be identified, return: {{"kpis": []}} validationMetadata = jsonData.get("validationMetadata", {}) actionType = validationMetadata.get("actionType") + logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}") + if actionType == "context.extractContent": # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} documentData = jsonData.get("documentData") - logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}") + else: + logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})") if documentData: from modules.datamodels.datamodelExtraction import ContentExtracted @@ -1258,7 +1342,11 @@ If no trackable items can be identified, return: {{"kpis": []}} except Exception as parseError: logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") + import traceback + logger.debug(f"Parse error traceback: {traceback.format_exc()}") return None + else: + logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})") return None except Exception as e: @@ -1277,26 +1365,122 @@ If no trackable items can be identified, return: {{"kpis": []}} Extrahierter Text oder None bei Fehler """ try: - from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + finalPrompt, + f"content_extraction_prompt_image_{imagePart.id}" + ) # Erstelle AI-Call-Request mit Image-Part request = AiCallRequest( - prompt=extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting.", + prompt=finalPrompt, context="", - options=AiCallOptions(operationType="extraction"), + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), contentParts=[imagePart] ) # Verwende AI-Service für Vision AI-Verarbeitung - response = await self.services.ai.call(request) + response = await self.services.ai.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_image_{imagePart.id}" + ) if response and response.content: return response.content.strip() - return None + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" except Exception as e: - logger.warning(f"Error extracting text from image {imagePart.id}: {str(e)}") - return None + errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + async def _processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Verarbeite Text-Content mit AI basierend auf extractionPrompt. + + WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text + (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI + verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt. + + Args: + textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ) + extractionPrompt: Prompt für die AI-Verarbeitung des Textes + + Returns: + AI-verarbeiteter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Process and extract the key information from the following text content." + + # Debug-Log (harmonisiert) - log prompt with text preview + textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "") + promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}" + self.services.utils.writeDebugFile( + promptWithContext, + f"content_extraction_prompt_text_{textPart.id}" + ) + + # Erstelle Text-ContentPart für AI-Verarbeitung + # Verwende den vorhandenen Text als Input + textContentPart = ContentPart( + id=textPart.id, + label=textPart.label, + typeGroup="text", + mimeType="text/plain", + data=textPart.data if textPart.data else "", + metadata=textPart.metadata.copy() if textPart.metadata else {} + ) + + # Erstelle AI-Call-Request mit Text-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT), + contentParts=[textContentPart] + ) + + # Verwende AI-Service für Text-Verarbeitung + response = await self.services.ai.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_text_{textPart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" def _buildIntentAnalysisPrompt( self, @@ -1390,6 +1574,17 @@ Return ONLY valid JSON following the structure above. documents: List[ChatDocument], documentIntents: List[DocumentIntent], parentOperationId: str + ) -> List[ContentPart]: + """Delegate to ContentExtractor.""" + return await self.contentExtractor.extractAndPrepareContent( + documents, documentIntents, parentOperationId, self._getIntentForDocument + ) + + async def _extractAndPrepareContent_OLD( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str ) -> List[ContentPart]: """ Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. @@ -1425,12 +1620,25 @@ Return ONLY valid JSON following the structure above. for document in documents: # Check if document is already a ContentExtracted document (pre-extracted JSON) + logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") preExtracted = self._resolvePreExtractedDocument(document) if preExtracted: + logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}") + logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}") + logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}") + # Verwende bereits extrahierte ContentParts direkt contentExtracted = preExtracted["contentExtracted"] + + # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original + # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID) intent = self._getIntentForDocument(document.id, documentIntents) + logger.info(f" Intent lookup for document {document.id}: found={intent is not None}") + if intent: + logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...") + else: + logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") if contentExtracted.parts: for part in contentExtracted.parts: @@ -1445,96 +1653,267 @@ Return ONLY valid JSON following the structure above. # Ensure metadata is complete if "documentId" not in part.metadata: part.metadata["documentId"] = document.id - if "contentFormat" not in part.metadata: - part.metadata["contentFormat"] = "extracted" # WICHTIG: Prüfe Intent für dieses Part partIntent = intent.intents if intent else ["extract"] - # Wenn Intent "render" für Images hat, erstelle auch object Part - if "render" in partIntent and part.typeGroup == "image" and part.data: - # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part - # 1. Extracted Part - prüfe ob "extract" Intent vorhanden ist - if "extract" in partIntent: - # Image hat sowohl extract als auch render Intent - # Extracted Part: Wird mit Vision AI verarbeitet (skipExtraction=False) - part.metadata["intent"] = "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = False # WICHTIG: Vision AI-Verarbeitung nötig! - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - else: - # Nur render Intent - kein Text-Extraktion nötig - part.metadata["intent"] = "render" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - - # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) - objectPart = ContentPart( - id=f"obj_{document.id}_{part.id}", - label=f"Object: {part.label or 'Image'}", - typeGroup="image", - mimeType=part.mimeType or "image/jpeg", - data=part.data, # Base64 data ist bereits vorhanden + # Debug-Logging für Intent-Verarbeitung + logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}") + + # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart + # Generische Intent-Verarbeitung für ALLE Content-Typen + hasReferenceIntent = "reference" in partIntent + hasRenderIntent = "render" in partIntent + hasExtractIntent = "extract" in partIntent + hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0) + + logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}") + + # Track ob der originale Part bereits hinzugefügt wurde + originalPartAdded = False + + # 1. Reference Intent: Erstelle Reference ContentPart + if hasReferenceIntent: + referencePart = ContentPart( + id=f"ref_{document.id}_{part.id}", + label=f"Reference: {part.label or 'Content'}", + typeGroup="reference", + mimeType=part.mimeType or "application/octet-stream", + data="", # Leer - nur Referenz metadata={ - "contentFormat": "object", + "contentFormat": "reference", "documentId": document.id, - "intent": "render", - "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedExtractedPartId": part.id + "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}", + "intent": "reference", + "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"] } ) - allContentParts.append(objectPart) - elif part.typeGroup == "image" and "extract" in partIntent: - # Image mit extract Intent: Vision AI-Verarbeitung nötig - # Verarbeite Image mit Vision AI, um Text zu extrahieren - try: - extractedText = await self._extractTextFromImage(part, intent.extractionPrompt if intent else "Extract all text content from this image") - if extractedText: - # Erstelle neuen Text-Part mit extrahiertem Text - textPart = ContentPart( - id=f"extracted_{part.id}", - label=f"Extracted text from {part.label or 'Image'}", - typeGroup="text", - mimeType="text/plain", - data=extractedText, - metadata={ - "contentFormat": "extracted", - "documentId": document.id, - "intent": "extract", - "originalFileName": preExtracted["originalDocument"]["fileName"], - "relatedImagePartId": part.id, - "extractionPrompt": intent.extractionPrompt if intent else "Extract all text content from this image" - } - ) - allContentParts.append(textPart) - logger.info(f"✅ Extracted text from image {part.id} using Vision AI") + allContentParts.append(referencePart) + logger.debug(f"✅ Created reference ContentPart for {part.id}") + + # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering) + if hasRenderIntent and hasPartData: + # Prüfe ob es ein Binary/Image ist (kann gerendert werden) + isRenderable = ( + part.typeGroup == "image" or + part.typeGroup == "binary" or + (part.mimeType and ( + part.mimeType.startswith("image/") or + part.mimeType.startswith("video/") or + part.mimeType.startswith("audio/") or + self._isBinary(part.mimeType) + )) + ) + + if isRenderable: + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Content'}", + typeGroup=part.typeGroup, + mimeType=part.mimeType or "application/octet-stream", + data=part.data, # Base64/Binary data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None + } + ) + allContentParts.append(objectPart) + logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)") + else: + logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})") + elif hasRenderIntent and not hasPartData: + logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") + + # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) + if hasExtractIntent: + # Spezielle Behandlung für Images: Vision AI für Text-Extraktion + if part.typeGroup == "image" and hasPartData: + logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") + try: + extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." + extractedText = await self._extractTextFromImage(part, extractionPrompt) + if extractedText: + # Prüfe ob es ein Error-Message ist + isError = extractedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message + textPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=extractedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": isError + } + ) + allContentParts.append(textPart) + if isError: + logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") + else: + logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": True + } + ) + allContentParts.append(errorPart) + except Exception as e: + logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part + # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen + if not hasRenderIntent: + logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") + else: + # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird + # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content + # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, + # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. - # Wenn auch render Intent vorhanden, füge Image-Part hinzu - if "render" in partIntent: - part.metadata["intent"] = "render" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - except Exception as e: - logger.warning(f"Failed to extract text from image {part.id}: {str(e)}, adding image as-is") - # Fallback: Füge Image-Part hinzu ohne Text-Extraktion - part.metadata["intent"] = "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = False - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] - allContentParts.append(part) - else: - # Normales extracted Part (kein Image oder kein extract Intent) - part.metadata["intent"] = partIntent[0] if partIntent else "extract" - part.metadata["fromExtractContent"] = True - part.metadata["skipExtraction"] = True # Bereits extrahiert - part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) + isTextContent = ( + part.typeGroup == "text" or + part.typeGroup == "table" or + (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) + ) + + if isTextContent and intent and intent.extractionPrompt: + # Text-Content mit extractionPrompt: Verarbeite mit AI + logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") + try: + extractionPrompt = intent.extractionPrompt + processedText = await self._processTextContentWithAi(part, extractionPrompt) + if processedText: + # Prüfe ob es ein Error-Message ist + isError = processedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message + processedPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=processedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "fromExtractContent": True, + "isError": isError + } + ) + allContentParts.append(processedPart) + originalPartAdded = True + if isError: + logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") + else: + logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "isError": True + } + ) + allContentParts.append(errorPart) + originalPartAdded = True + except Exception as e: + logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Fallback: Verwende Original-Part + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + allContentParts.append(part) + originalPartAdded = True + else: + # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted + # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) + # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, # Bereits extrahiert + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + # Stelle sicher dass contentFormat gesetzt ist + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + allContentParts.append(part) + originalPartAdded = True + logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") + + # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt + # (sollte normalerweise nicht vorkommen, da default "extract" ist) + if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded: + logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default") + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"] + }) allContentParts.append(part) + originalPartAdded = True logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") @@ -1715,6 +2094,18 @@ Return ONLY valid JSON following the structure above. contentParts: List[ContentPart], outputFormat: str, parentOperationId: str + ) -> Dict[str, Any]: + """Delegate to StructureGenerator.""" + return await self.structureGenerator.generateStructure( + userPrompt, contentParts, outputFormat, parentOperationId + ) + + async def _generateStructure_OLD( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str ) -> Dict[str, Any]: """ Phase 5C: Generiert Dokument-Struktur mit Sections. @@ -1782,17 +2173,36 @@ Return ONLY valid JSON following the structure above. # Baue ContentParts-Index - filtere leere Parts heraus contentPartsIndex = "" validParts = [] + filteredParts = [] + for part in contentParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + + # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen + if contentFormat == "reference": + validParts.append(part) + logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") + continue + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + # ABER: Reference Parts wurden bereits oben behandelt if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): # Überspringe Container-Parts ohne Daten if part.typeGroup == "container" and not part.data: + filteredParts.append((part.id, "container without data")) continue - # Überspringe andere leere Parts + # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) if not part.data: + filteredParts.append((part.id, f"no data (format: {contentFormat})")) continue validParts.append(part) + logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") + + if filteredParts: + logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") + + logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") # Baue Index nur für gültige Parts for i, part in enumerate(validParts, 1): @@ -1825,11 +2235,14 @@ Return ONLY valid JSON following the structure above. elif contentFormat == "reference": dataPreview = part.metadata.get("documentReference", "reference") + originalFileName = part.metadata.get('originalFileName', 'N/A') + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" contentPartsIndex += f" Format: {contentFormat}\n" contentPartsIndex += f" Type: {part.typeGroup}\n" contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Original file name: {originalFileName}\n" contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" contentPartsIndex += f" Data preview: {dataPreview}\n" @@ -1896,6 +2309,18 @@ Return ONLY valid JSON following the structure above. contentParts: List[ContentPart], userPrompt: str, parentOperationId: str + ) -> Dict[str, Any]: + """Delegate to StructureFiller.""" + return await self.structureFiller.fillStructure( + structure, contentParts, userPrompt, parentOperationId + ) + + async def _fillStructure_OLD( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str ) -> Dict[str, Any]: """ Phase 5D: Füllt Struktur mit tatsächlichem Content. @@ -1935,105 +2360,244 @@ Return ONLY valid JSON following the structure above. # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) sections_to_process = [] + all_sections_list = [] # Für Kontext-Informationen for doc in filledStructure.get("documents", []): - for section in doc.get("sections", []): + doc_sections = doc.get("sections", []) + all_sections_list.extend(doc_sections) + for section in doc_sections: sections_to_process.append((doc, section)) # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) - for doc, section in sections_to_process: + for sectionIndex, (doc, section) in enumerate(sections_to_process): sectionId = section.get("id") contentPartIds = section.get("contentPartIds", []) contentFormats = section.get("contentFormats", {}) generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") elements = [] - # Verarbeite ContentParts - for partId in contentPartIds: - part = self._findContentPartById(partId, contentParts) - if not part: - continue - - contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) - - if contentFormat == "reference": - # Füge Dokument-Referenz hinzu - elements.append({ - "type": "reference", - "documentReference": part.metadata.get("documentReference"), - "label": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "object": - # Füge base64 Object hinzu - elements.append({ - "type": part.typeGroup, # "image", "binary", etc. - "base64Data": part.data, - "mimeType": part.mimeType, - "altText": part.metadata.get("usageHint", part.label) - }) - - elif contentFormat == "extracted": - # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden) - elements.append({ - "type": "extracted_text", - "content": part.data, - "source": part.metadata.get("documentId"), - "extractionPrompt": part.metadata.get("extractionPrompt") - }) + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) - # Generiere AI-Content wenn nötig - if generationHint: - generationPrompt = self._buildSectionGenerationPrompt( - section=section, - contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds], - userPrompt=userPrompt, - generationHint=generationHint - ) + if needsAggregation and generationHint: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds + ] + sectionParts = [p for p in sectionParts if p is not None] - # Erstelle Operation-ID für Section-Generierung - # Debug-Logs werden bereits von callAiPlanning geschrieben - sectionOperationId = f"{fillOperationId}_section_{sectionId}" - - # Starte ChatLog mit Parent-Referenz - self.services.chat.progressLogStart( - sectionOperationId, - "Section Generation", - "Section", - f"Generating section {sectionId}", - parentOperationId=fillOperationId - ) - - try: - # Generiere Content (verwende callAiPlanning für einfache JSON-Responses) - # Debug-Logs werden bereits von callAiPlanning geschrieben - aiResponse = await self.callAiPlanning( - prompt=generationPrompt, - debugType=f"section_generation_{sectionId}" - ) + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] - # Parse und füge zu elements hinzu - generatedElements = json.loads( - self.services.utils.jsonExtractString(aiResponse) - ) - if isinstance(generatedElements, list): - elements.extend(generatedElements) - elif isinstance(generatedElements, dict) and "elements" in generatedElements: - elements.extend(generatedElements["elements"]) + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) - # ChatLog abschließen - self.services.chat.progressLogFinish(sectionOperationId, True) - - except Exception as e: - # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) - self.services.chat.progressLogFinish(sectionOperationId, False) - elements.append({ - "type": "error", - "message": f"Error generating section {sectionId}: {str(e)}", - "sectionId": sectionId - }) - logger.error(f"Error generating section {sectionId}: {str(e)}") - # NICHT raise - Section wird mit Fehlermeldung gerendert + # Aggregiere extracted Parts mit AI + if extractedParts: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + "Section", + f"Generating section {sectionId} with {len(extractedParts)} parts", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + request = AiCallRequest( + prompt=generationPrompt, + contentParts=extractedParts, # ALLE PARTS! + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + if generationHint: + # AI-Call mit einzelnen ContentPart + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[part], # EIN PART + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[part], + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + else: + # Füge extrahierten Text direkt hinzu (kein AI-Call) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) section["elements"] = elements @@ -2052,35 +2616,185 @@ Return ONLY valid JSON following the structure above. section: Dict[str, Any], contentParts: List[Optional[ContentPart]], userPrompt: str, - generationHint: str + generationHint: str, + allSections: Optional[List[Dict[str, Any]]] = None, + sectionIndex: Optional[int] = None, + isAggregation: bool = False ) -> str: - """Baue Prompt für Section-Generierung.""" + """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" # Filtere None-Werte validParts = [p for p in contentParts if p is not None] - contentPartsText = "" - for part in validParts: - contentFormat = part.metadata.get("contentFormat", "unknown") - contentPartsText += f"\n- ContentPart {part.id}:\n" - contentPartsText += f" Format: {contentFormat}\n" - if contentFormat == "extracted": - contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n" - elif contentFormat == "reference": - contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" - elif contentFormat == "object": - contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n" + # Section-Metadaten + sectionId = section.get("id", "unknown") + contentType = section.get("content_type", "paragraph") - prompt = f"""USER REQUEST: + # Baue ContentParts-Beschreibung + contentPartsText = "" + if isAggregation: + # Aggregation: Zeige nur Metadaten, nicht Previews + contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" + contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" + contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" + contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" + contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" + contentPartsText += f"ContentPart IDs:\n" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" + if part.metadata.get("originalFileName"): + contentPartsText += f", Source: {part.metadata.get('originalFileName')}" + contentPartsText += ")\n" + else: + # Einzelverarbeitung: Zeige Previews + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + contentPartsText += f" Type: {part.typeGroup}\n" + if part.metadata.get("originalFileName"): + contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" + + if contentFormat == "extracted": + # Zeige Preview von extrahiertem Text (länger für besseren Kontext) + previewLength = 1000 + if part.data: + preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data + contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + else: + contentPartsText += f" Content: (empty)\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + contentPartsText += f" Object type: {part.typeGroup}\n" + contentPartsText += f" MIME type: {part.mimeType}\n" + contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + + # Baue Section-Kontext (vorherige und nachfolgende Sections) + contextText = "" + if allSections and sectionIndex is not None: + prevSections = [] + nextSections = [] + + if sectionIndex > 0: + for i in range(max(0, sectionIndex - 2), sectionIndex): + prevSection = allSections[i] + prevSections.append({ + "id": prevSection.get("id"), + "content_type": prevSection.get("content_type"), + "generation_hint": prevSection.get("generation_hint", "")[:100] + }) + + if sectionIndex < len(allSections) - 1: + for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): + nextSection = allSections[i] + nextSections.append({ + "id": nextSection.get("id"), + "content_type": nextSection.get("content_type"), + "generation_hint": nextSection.get("generation_hint", "")[:100] + }) + + if prevSections or nextSections: + contextText = "\n## DOCUMENT CONTEXT\n" + if prevSections: + contextText += "\nPrevious sections:\n" + for prev in prevSections: + contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" + if nextSections: + contextText += "\nFollowing sections:\n" + for next in nextSections: + contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" + + if isAggregation: + prompt = f"""# TASK: Generate Section Content (Aggregation) + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` {userPrompt} +``` -SECTION TO GENERATE: -{generationHint} +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} -AVAILABLE CONTENT FOR THIS SECTION: -{contentPartsText} +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) +3. For table content_type: Create a single table with headers and rows from all ContentParts +4. For bullet_list content_type: Create a single list with items from all ContentParts +5. Format appropriately based on content_type ({contentType}) +6. Ensure the generated content fits logically between previous and following sections +7. Return ONLY a JSON object with an "elements" array +8. Each element should match the content_type: {contentType} -CRITICAL: Return ONLY a JSON object with an "elements" array. -Jedes Element sollte dem content_type der Section entsprechen. +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "headers": [...], // if table + "rows": [...], // if table + "items": [...], // if bullet_list + "content": "..." // if paragraph + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + else: + prompt = f"""# TASK: Generate Section Content + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. Use the available content parts to populate this section +3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data +4. For extracted text: Format appropriately based on content_type ({contentType}) +5. Ensure the generated content fits logically between previous and following sections +6. Return ONLY a JSON object with an "elements" array +7. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "content": "..." + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. """ return prompt @@ -2091,6 +2805,35 @@ Jedes Element sollte dem content_type der Section entsprechen. return part return None + def _needsAggregation( + self, + contentType: str, + contentPartCount: int + ) -> bool: + """ + Bestimmt ob mehrere ContentParts aggregiert werden müssen. + + Aggregation nötig wenn: + - content_type erfordert Aggregation (table, bullet_list) + - UND mehrere ContentParts vorhanden sind (> 1) + + Args: + contentType: Section content_type + contentPartCount: Anzahl der ContentParts in dieser Section + + Returns: + True wenn Aggregation nötig, False sonst + """ + aggregationTypes = ["table", "bullet_list"] + + if contentType in aggregationTypes and contentPartCount > 1: + return True + + # Optional: Auch für paragraph wenn mehrere Parts vorhanden + # (z.B. Vergleich mehrerer Dokumente) + # Standard: Keine Aggregation für paragraph + return False + async def _renderResult( self, filledStructure: Dict[str, Any], diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py new file mode 100644 index 00000000..229587f8 --- /dev/null +++ b/modules/services/serviceAi/subContentExtraction.py @@ -0,0 +1,670 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Content Extraction Module + +Handles content extraction and preparation, including: +- Extracting content from documents based on intents +- Processing pre-extracted documents +- Vision AI for image text extraction +- AI processing of text content +""" +import json +import logging +import base64 +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent + +logger = logging.getLogger(__name__) + + +class ContentExtractor: + """Handles content extraction and preparation.""" + + def __init__(self, services, aiService, intentAnalyzer): + """Initialize ContentExtractor with service center, AI service, and intent analyzer access.""" + self.services = services + self.aiService = aiService + self.intentAnalyzer = intentAnalyzer + + async def extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str, + getIntentForDocument: callable + ) -> List[ContentPart]: + """ + Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. + Gibt Liste von ContentParts im passenden Format zurück. + + WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. + Beispiel: Bild mit intents=["extract", "render"] erzeugt: + - ContentPart(contentFormat="object", ...) für Rendering + - ContentPart(contentFormat="extracted", ...) für Text-Analyse + + Args: + documents: Liste der zu verarbeitenden Dokumente + documentIntents: Liste von DocumentIntent-Objekten + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + getIntentForDocument: Callable to get intent for document ID + + Returns: + Liste von ContentParts mit vollständigen Metadaten + """ + # Erstelle Operation-ID für Extraktion + extractionOperationId = f"{parentOperationId}_content_extraction" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + extractionOperationId, + "Content Extraction", + "Extraction", + f"Extracting from {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + allContentParts = [] + + for document in documents: + # Check if document is already a ContentExtracted document (pre-extracted JSON) + logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content") + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document) + + if preExtracted: + logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}") + logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}") + logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}") + + # Verwende bereits extrahierte ContentParts direkt + contentExtracted = preExtracted["contentExtracted"] + + # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original + # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID) + intent = getIntentForDocument(document.id, documentIntents) + logger.info(f" Intent lookup for document {document.id}: found={intent is not None}") + if intent: + logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...") + else: + logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}") + + if contentExtracted.parts: + for part in contentExtracted.parts: + # Überspringe leere Parts (Container ohne Daten) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + if part.typeGroup == "container": + continue # Überspringe leere Container + + if not part.metadata: + part.metadata = {} + + # Ensure metadata is complete + if "documentId" not in part.metadata: + part.metadata["documentId"] = document.id + + # WICHTIG: Prüfe Intent für dieses Part + partIntent = intent.intents if intent else ["extract"] + + # Debug-Logging für Intent-Verarbeitung + logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}") + + # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart + # Generische Intent-Verarbeitung für ALLE Content-Typen + hasReferenceIntent = "reference" in partIntent + hasRenderIntent = "render" in partIntent + hasExtractIntent = "extract" in partIntent + hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0) + + logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}") + + # Track ob der originale Part bereits hinzugefügt wurde + originalPartAdded = False + + # 1. Reference Intent: Erstelle Reference ContentPart + if hasReferenceIntent: + referencePart = ContentPart( + id=f"ref_{document.id}_{part.id}", + label=f"Reference: {part.label or 'Content'}", + typeGroup="reference", + mimeType=part.mimeType or "application/octet-stream", + data="", # Leer - nur Referenz + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}", + "intent": "reference", + "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"] + } + ) + allContentParts.append(referencePart) + logger.debug(f"✅ Created reference ContentPart for {part.id}") + + # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering) + if hasRenderIntent and hasPartData: + # Prüfe ob es ein Binary/Image ist (kann gerendert werden) + isRenderable = ( + part.typeGroup == "image" or + part.typeGroup == "binary" or + (part.mimeType and ( + part.mimeType.startswith("image/") or + part.mimeType.startswith("video/") or + part.mimeType.startswith("audio/") or + self._isBinary(part.mimeType) + )) + ) + + if isRenderable: + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Content'}", + typeGroup=part.typeGroup, + mimeType=part.mimeType or "application/octet-stream", + data=part.data, # Base64/Binary data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None + } + ) + allContentParts.append(objectPart) + logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)") + else: + logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})") + elif hasRenderIntent and not hasPartData: + logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part") + + # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung) + if hasExtractIntent: + # Spezielle Behandlung für Images: Vision AI für Text-Extraktion + if part.typeGroup == "image" and hasPartData: + logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)") + try: + extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting." + extractedText = await self.extractTextFromImage(part, extractionPrompt) + if extractedText: + # Prüfe ob es ein Error-Message ist + isError = extractedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message + textPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=extractedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": isError + } + ) + allContentParts.append(textPart) + if isError: + logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}") + else: + logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error extracting from {part.label or 'Image'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "vision", + "isError": True + } + ) + allContentParts.append(errorPart) + except Exception as e: + logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part + # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen + if not hasRenderIntent: + logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available") + else: + # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird + # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content + # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist, + # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt. + + # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden) + isTextContent = ( + part.typeGroup == "text" or + part.typeGroup == "table" or + (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0) + ) + + if isTextContent and intent and intent.extractionPrompt: + # Text-Content mit extractionPrompt: Verarbeite mit AI + logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)") + try: + extractionPrompt = intent.extractionPrompt + processedText = await self.processTextContentWithAi(part, extractionPrompt) + if processedText: + # Prüfe ob es ein Error-Message ist + isError = processedText.startswith("[ERROR:") + + # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message + processedPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=processedText, + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None, + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "fromExtractContent": True, + "isError": isError + } + ) + allContentParts.append(processedPart) + originalPartAdded = True + if isError: + logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}") + else: + logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars") + else: + # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück) + errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}" + logger.error(errorMsg) + errorPart = ContentPart( + id=f"extracted_{document.id}_{part.id}", + label=f"Error processing {part.label or 'Content'}", + typeGroup="text", + mimeType="text/plain", + data=f"[ERROR: {errorMsg}]", + metadata={ + "contentFormat": "extracted", + "documentId": document.id, + "intent": "extract", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "extractionPrompt": extractionPrompt, + "extractionMethod": "ai", + "sourcePartId": part.id, + "isError": True + } + ) + allContentParts.append(errorPart) + originalPartAdded = True + except Exception as e: + logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}") + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Fallback: Verwende Original-Part + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + allContentParts.append(part) + originalPartAdded = True + else: + # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted + # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig) + # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent) + if not originalPartAdded: + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, # Bereits extrahiert + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None + }) + # Stelle sicher dass contentFormat gesetzt ist + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + allContentParts.append(part) + originalPartAdded = True + logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)") + + # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt + # (sollte normalerweise nicht vorkommen, da default "extract" ist) + if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded: + logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default") + part.metadata.update({ + "contentFormat": "extracted", + "intent": "extract", + "fromExtractContent": True, + "skipExtraction": True, + "originalFileName": preExtracted["originalDocument"]["fileName"] + }) + allContentParts.append(part) + originalPartAdded = True + + logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") + logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") + continue # Skip normal extraction for this document + + # Check if it's standardized JSON format (has "documents" or "sections") + if document.mimeType == "application/json": + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if docBytes: + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.info(f"Document is already in standardized JSON format, using as reference") + # Create reference ContentPart for structured JSON + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="structure", + mimeType="application/json", + data=docData, + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "skipExtraction": True, + "intent": "reference" + } + ) + allContentParts.append(contentPart) + logger.info(f"✅ Using JSON document directly without extraction") + continue # Skip normal extraction for this document + except Exception as e: + logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") + # Continue with normal extraction + + # Normal extraction path + intent = getIntentForDocument(document.id, documentIntents) + + if not intent: + # Default: extract für alle Dokumente ohne Intent + logger.warning(f"No intent found for document {document.id}, using default 'extract'") + intent = DocumentIntent( + documentId=document.id, + intents=["extract"], + extractionPrompt="Extract all content from the document", + reasoning="Default intent: no specific intent found" + ) + + # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen + + if "reference" in intent.intents: + # Erstelle Reference ContentPart + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="reference", + mimeType=document.mimeType, + data="", + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "intent": "reference", + "usageHint": f"Reference document: {document.fileName}" + } + ) + allContentParts.append(contentPart) + + # WICHTIG: "render" und "extract" können beide vorhanden sein! + # In diesem Fall erzeugen wir BEIDE ContentParts + + if "render" in intent.intents: + # Für Images/Binary: extrahiere als Object + if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): + try: + # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) + binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) + if not binaryData: + logger.warning(f"No binary data found for document {document.id}") + continue + base64Data = base64.b64encode(binaryData).decode('utf-8') + + contentPart = ContentPart( + id=f"obj_{document.id}", + label=f"Object: {document.fileName}", + typeGroup="image" if document.mimeType.startswith("image/") else "binary", + mimeType=document.mimeType, + data=base64Data, + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {document.fileName}", + "originalFileName": document.fileName, + # Verknüpfung zu extracted Part (falls vorhanden) + "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None + } + ) + allContentParts.append(contentPart) + except Exception as e: + logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") + + if "extract" in intent.intents: + # Extrahiere Content mit Extraction Service + extractionPrompt = intent.extractionPrompt or "Extract all content from the document" + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + extractionPrompt, + f"content_extraction_prompt_{document.id}" + ) + + # Führe Extraktion aus + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy() + ) + + # extractContent ist nicht async - keine await nötig + extractedResults = self.services.extraction.extractContent( + [document], + extractionOptions, + operationId=extractionOperationId, + parentOperationId=extractionOperationId + ) + + # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten + for extracted in extractedResults: + for part in extracted.parts: + # Markiere als extracted Format + part.metadata.update({ + "contentFormat": "extracted", + "documentId": document.id, + "extractionPrompt": extractionPrompt, + "intent": "extract", + "usageHint": f"Use extracted content from {document.fileName}", + # Verknüpfung zu object Part (falls vorhanden) + "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None + }) + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) + if "render" in intent.intents: + part.id = f"ext_{document.id}_{part.id}" + allContentParts.append(part) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([part.dict() for part in allContentParts], indent=2, default=str), + "content_extraction_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(extractionOperationId, True) + + return allContentParts + + except Exception as e: + self.services.chat.progressLogFinish(extractionOperationId, False) + logger.error(f"Error in extractAndPrepareContent: {str(e)}") + raise + + async def extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Extrahiere Text aus einem Image-Part mit Vision AI. + + Args: + imagePart: ContentPart mit typeGroup="image" + extractionPrompt: Prompt für die Text-Extraktion + + Returns: + Extrahierter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting." + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + finalPrompt, + f"content_extraction_prompt_image_{imagePart.id}" + ) + + # Erstelle AI-Call-Request mit Image-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE), + contentParts=[imagePart] + ) + + # Verwende AI-Service für Vision AI-Verarbeitung + response = await self.aiService.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_image_{imagePart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + async def processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]: + """ + Verarbeite Text-Content mit AI basierend auf extractionPrompt. + + WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text + (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI + verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt. + + Args: + textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ) + extractionPrompt: Prompt für die AI-Verarbeitung des Textes + + Returns: + AI-verarbeiteter Text oder None bei Fehler + """ + try: + from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum + + # Final extraction prompt + finalPrompt = extractionPrompt or "Process and extract the key information from the following text content." + + # Debug-Log (harmonisiert) - log prompt with text preview + textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "") + promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}" + self.services.utils.writeDebugFile( + promptWithContext, + f"content_extraction_prompt_text_{textPart.id}" + ) + + # Erstelle Text-ContentPart für AI-Verarbeitung + # Verwende den vorhandenen Text als Input + textContentPart = ContentPart( + id=textPart.id, + label=textPart.label, + typeGroup="text", + mimeType="text/plain", + data=textPart.data if textPart.data else "", + metadata=textPart.metadata.copy() if textPart.metadata else {} + ) + + # Erstelle AI-Call-Request mit Text-Part + request = AiCallRequest( + prompt=finalPrompt, + context="", + options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT), + contentParts=[textContentPart] + ) + + # Verwende AI-Service für Text-Verarbeitung + response = await self.aiService.callAi(request) + + # Debug-Log für Response (harmonisiert) + if response and response.content: + self.services.utils.writeDebugFile( + response.content, + f"content_extraction_response_text_{textPart.id}" + ) + + if response and response.content: + return response.content.strip() + + # Kein Content zurückgegeben - return error message für Debugging + errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}" + logger.warning(errorMsg) + return f"[ERROR: {errorMsg}]" + except Exception as e: + errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}" + logger.error(errorMsg) + import traceback + logger.debug(f"Traceback: {traceback.format_exc()}") + # Return error message statt None für Debugging + return f"[ERROR: {errorMsg}]" + + def _isBinary(self, mimeType: str) -> bool: + """Prüfe ob MIME-Type binary ist.""" + binaryTypes = [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/x-zip-compressed" + ] + return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + diff --git a/modules/services/serviceAi/subDocumentIntents.py b/modules/services/serviceAi/subDocumentIntents.py new file mode 100644 index 00000000..c1faba39 --- /dev/null +++ b/modules/services/serviceAi/subDocumentIntents.py @@ -0,0 +1,302 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Document Intent Analysis Module + +Handles analysis of document intents, including: +- Clarifying which documents need extraction vs reference +- Resolving pre-extracted documents +- Building intent analysis prompts +""" +import json +import logging +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelChat import ChatDocument +from modules.datamodels.datamodelExtraction import DocumentIntent + +logger = logging.getLogger(__name__) + + +class DocumentIntentAnalyzer: + """Handles document intent analysis and resolution.""" + + def __init__(self, services, aiService): + """Initialize DocumentIntentAnalyzer with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def clarifyDocumentIntents( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str + ) -> List[DocumentIntent]: + """ + Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. + Gibt DocumentIntent für jedes Dokument zurück. + + Args: + documents: Liste der zu verarbeitenden Dokumente + userPrompt: User-Anfrage + actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von DocumentIntent-Objekten + """ + # Erstelle Operation-ID für Intent-Analyse + intentOperationId = f"{parentOperationId}_intent_analysis" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + intentOperationId, + "Document Intent Analysis", + "Intent Analysis", + f"Analyzing {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse + documentMapping = {} # Maps original doc ID -> JSON doc ID + resolvedDocuments = [] + + for doc in documents: + preExtracted = self.resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + documentMapping[originalDocId] = doc.id + # Erstelle temporäres ChatDocument für ursprüngliches Dokument + originalDoc = ChatDocument( + id=originalDocId, + fileName=preExtracted["originalDocument"]["fileName"], + mimeType=preExtracted["originalDocument"]["mimeType"], + fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), + fileId=doc.fileId, # Behalte fileId vom JSON + messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden + ) + resolvedDocuments.append(originalDoc) + else: + resolvedDocuments.append(doc) + + # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten + intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.aiService.callAiPlanning( + prompt=intentPrompt, + debugType="document_intent_analysis" + ) + + # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig + intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) + documentIntents = [] + for intent in intentsData.get("intents", []): + docId = intent.get("documentId") + # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID + if docId in documentMapping: + intent["documentId"] = documentMapping[docId] + documentIntents.append(DocumentIntent(**intent)) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([intent.dict() for intent in documentIntents], indent=2), + "document_intent_analysis_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(intentOperationId, True) + + return documentIntents + + except Exception as e: + self.services.chat.progressLogFinish(intentOperationId, False) + logger.error(f"Error in clarifyDocumentIntents: {str(e)}") + raise + + def resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: + """ + Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. + Gibt Dict zurück mit: + - originalDocument: ChatDocument-Info des ursprünglichen Dokuments + - contentExtracted: ContentExtracted-Objekt mit Parts + - parts: Liste der ContentParts + + Returns None wenn kein pre-extracted Format erkannt wird. + """ + if document.mimeType != "application/json": + logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check") + return None + + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if not docBytes: + return None + + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if not isinstance(jsonData, dict): + return None + + # Check for ContentExtracted format + # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt + documentData = None + + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}") + + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}") + else: + logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})") + + if documentData: + from modules.datamodels.datamodelExtraction import ContentExtracted + + try: + # Stelle sicher, dass "id" vorhanden ist + if "id" not in documentData: + documentData["id"] = document.id + + contentExtracted = ContentExtracted(**documentData) + + if contentExtracted.parts: + # Extrahiere ursprüngliche Dokument-Info aus den Parts + originalDocId = None + originalFileName = None + originalMimeType = None + + for part in contentExtracted.parts: + if part.metadata: + # Versuche ursprüngliche Dokument-Info zu finden + if not originalDocId and part.metadata.get("documentId"): + originalDocId = part.metadata.get("documentId") + if not originalFileName and part.metadata.get("originalFileName"): + originalFileName = part.metadata.get("originalFileName") + if not originalMimeType and part.metadata.get("documentMimeType"): + originalMimeType = part.metadata.get("documentMimeType") + + # Falls nicht gefunden, versuche aus documentName zu extrahieren + if not originalFileName: + # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") + if document.fileName and "_extracted_" in document.fileName: + originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" + + return { + "originalDocument": { + "id": originalDocId or document.id, + "fileName": originalFileName or document.fileName, + "mimeType": originalMimeType or "application/pdf", + "fileSize": document.fileSize + }, + "contentExtracted": contentExtracted, + "parts": contentExtracted.parts + } + except Exception as parseError: + logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}") + logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}") + import traceback + logger.debug(f"Parse error traceback: {traceback.format_exc()}") + return None + else: + logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})") + + return None + except Exception as e: + logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") + return None + + def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] + ) -> str: + """Baue Prompt für Intent-Analyse.""" + # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs + docListText = "" + for i, doc in enumerate(documents, 1): + # Prüfe ob es ein pre-extracted JSON ist + preExtracted = self.resolvePreExtractedDocument(doc) + + if preExtracted: + # Zeige ursprüngliches Dokument statt JSON + originalDoc = preExtracted["originalDocument"] + partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" + docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" + docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" + docListText += f" MIME Type: {originalDoc['mimeType']}\n" + docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" + else: + # Normales Dokument + docListText += f"\n{i}. Document ID: {doc.id}\n" + docListText += f" File Name: {doc.fileName}\n" + docListText += f" MIME Type: {doc.mimeType}\n" + docListText += f" File Size: {doc.fileSize} bytes\n" + + outputFormat = actionParameters.get("outputFormat", "txt") + + prompt = f"""USER REQUEST: +{userPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine its intents (can be multiple): +- "extract": Content extraction needed (text, structure, OCR, etc.) +- "render": Image/binary should be rendered as-is (visual element) +- "reference": Document reference/attachment (no extraction, just reference) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], # Array - can contain multiple! + "extractionPrompt": "Extract all text content, preserving structure", + "reasoning": "User needs text content for document generation" + }}, + {{ + "documentId": "doc_2", + "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "extractionPrompt": "Extract text content from image using vision AI", + "reasoning": "Image contains text that needs extraction, but also should be rendered visually" + }}, + {{ + "documentId": "doc_3", + "intents": ["reference"], + "extractionPrompt": null, + "reasoning": "Document is only used as reference, no extraction needed" + }} + ] +}} + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → add "render" + - If user wants to "analyze", "read text", or "extract text" from images → add "extract" + - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering + +2. For text documents: + - If user mentions "template" or "structure" → "reference" or "extract" based on context + - If user mentions "reference" or "context" → "reference" + - Default → "extract" + +3. Consider output format: + - For formats like PDF, DOCX, PPTX: images usually need "render" + - For formats like CSV, JSON: usually "extract" only + - For HTML: can have both "extract" and "render" + +Return ONLY valid JSON following the structure above. +""" + return prompt + diff --git a/modules/services/serviceAi/subResponseParsing.py b/modules/services/serviceAi/subResponseParsing.py new file mode 100644 index 00000000..a2d568d9 --- /dev/null +++ b/modules/services/serviceAi/subResponseParsing.py @@ -0,0 +1,275 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Response Parsing Module + +Handles parsing of AI responses, including: +- Section extraction from responses +- JSON completeness detection +- Loop detection +- Document metadata extraction +- Final result building +""" +import json +import logging +from typing import Dict, Any, List, Optional, Tuple + +from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument +from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler +from modules.datamodels.datamodelAi import JsonAccumulationState + +logger = logging.getLogger(__name__) + + +class ResponseParser: + """Handles parsing of AI responses and completion detection.""" + + def __init__(self, services): + """Initialize ResponseParser with service center access.""" + self.services = services + + def extractSectionsFromResponse( + self, + result: str, + iteration: int, + debugPrefix: str, + allSections: List[Dict[str, Any]] = None, + accumulationState: Optional[JsonAccumulationState] = None + ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: + """ + Extract sections from AI response, handling both valid and broken JSON. + + NEW BEHAVIOR: + - First iteration: Check if complete, if not start accumulation + - Subsequent iterations: Accumulate strings, parse when complete + + Returns: + Tuple of: + - sections: Extracted sections + - wasJsonComplete: True if JSON is complete + - parsedResult: Parsed JSON object + - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode) + """ + if allSections is None: + allSections = [] + + if iteration == 1: + # First iteration - check if complete + parsed = None + try: + extracted = extractJsonString(result) + parsed = json.loads(extracted) + + # Check completeness + if JsonResponseHandler.isJsonComplete(parsed): + # Complete JSON - no accumulation needed + sections = extractSectionsFromDocument(parsed) + logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed") + return sections, True, parsed, None # No accumulation + except Exception: + pass + + # Incomplete - try to extract partial sections from broken JSON + logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections") + + partialSections = [] + if parsed: + # Try to extract sections from parsed (even if incomplete) + partialSections = extractSectionsFromDocument(parsed) + else: + # Try to repair broken JSON and extract sections + try: + repaired = repairBrokenJson(result) + if repaired: + partialSections = extractSectionsFromDocument(repaired) + parsed = repaired # Use repaired version for accumulation state + except Exception: + pass # If repair fails, continue with empty sections + + + # Define KPIs (async call - need to handle this) + # For now, create accumulation state without KPIs, will be updated after async call + accumulationState = JsonAccumulationState( + accumulatedJsonString=result, + isAccumulationMode=True, + lastParsedResult=parsed, + allSections=partialSections, + kpis=[] + ) + + # Note: KPI definition will be done in the caller (async context) + return partialSections, False, parsed, accumulationState + + else: + # Subsequent iterations - accumulate + if accumulationState and accumulationState.isAccumulationMode: + accumulated, sections, isComplete, parsedResult = \ + JsonResponseHandler.accumulateAndParseJsonFragments( + accumulationState.accumulatedJsonString, + result, + allSections, + iteration + ) + + # Update accumulation state + accumulationState.accumulatedJsonString = accumulated + accumulationState.lastParsedResult = parsedResult + accumulationState.allSections = allSections + sections if sections else allSections + accumulationState.isAccumulationMode = not isComplete + + # Log accumulated JSON for debugging + if parsedResult: + accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json") + + return sections, isComplete, parsedResult, accumulationState + else: + # No accumulation mode - process normally (shouldn't happen) + logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1") + return [], False, None, None + + def shouldContinueGeneration( + self, + allSections: List[Dict[str, Any]], + iteration: int, + wasJsonComplete: bool, + rawResponse: str = None + ) -> bool: + """ + Determine if AI generation loop should continue. + + CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD! + Action DoD is checked AFTER the AI Loop completes in _refineDecide. + + Simple logic: + - If JSON parsing failed or incomplete → continue (needs more content) + - If JSON parses successfully and is complete → stop (all content delivered) + - Loop detection prevents infinite loops + + CRITICAL: JSON completeness is determined by parsing, NOT by last character check! + Returns True if we should continue, False if AI Loop is done. + """ + if len(allSections) == 0: + return True # No sections yet, continue + + # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete + if not wasJsonComplete: + logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete") + return True + + # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection + if self._isStuckInLoop(allSections, iteration): + logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop") + return False + + # JSON is complete and not stuck in loop - done + logger.info(f"Iteration {iteration}: JSON complete - AI loop done") + return False + + def _isStuckInLoop( + self, + allSections: List[Dict[str, Any]], + iteration: int + ) -> bool: + """ + Detect if we're stuck in a loop (same content being repeated). + + Generic approach: Check if recent iterations are adding minimal or duplicate content. + """ + if iteration < 3: + return False # Need at least 3 iterations to detect a loop + + if len(allSections) == 0: + return False + + # Check if last section is very small (might be stuck) + lastSection = allSections[-1] + elements = lastSection.get("elements", []) + + if isinstance(elements, list) and elements: + lastElem = elements[-1] if elements else {} + else: + lastElem = elements if isinstance(elements, dict) else {} + + # Check content size of last section + lastSectionSize = 0 + if isinstance(lastElem, dict): + for key, value in lastElem.items(): + if isinstance(value, str): + lastSectionSize += len(value) + elif isinstance(value, list): + lastSectionSize += len(str(value)) + + # If last section is very small and we've done many iterations, might be stuck + if lastSectionSize < 100 and iteration > 10: + logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}") + return True + + return False + + def extractDocumentMetadata( + self, + parsedResult: Dict[str, Any] + ) -> Optional[Dict[str, Any]]: + """ + Extract document metadata (title, filename) from parsed AI response. + Returns dict with 'title' and 'filename' keys if found, None otherwise. + """ + if not isinstance(parsedResult, dict): + return None + + # Try to get from documents array (preferred structure) + if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0: + firstDoc = parsedResult["documents"][0] + if isinstance(firstDoc, dict): + title = firstDoc.get("title") + filename = firstDoc.get("filename") + if title or filename: + return { + "title": title, + "filename": filename + } + + return None + + def buildFinalResultFromSections( + self, + allSections: List[Dict[str, Any]], + documentMetadata: Optional[Dict[str, Any]] = None + ) -> str: + """ + Build final JSON result from accumulated sections. + Uses AI-provided metadata (title, filename) if available. + """ + if not allSections: + return "" + + # Extract metadata from AI response if available + title = "Generated Document" + filename = "document.json" + if documentMetadata: + if documentMetadata.get("title"): + title = documentMetadata["title"] + if documentMetadata.get("filename"): + filename = documentMetadata["filename"] + + # Build documents structure + # Assuming single document for now + documents = [{ + "id": "doc_1", + "title": title, + "filename": filename, + "sections": allSections + }] + + result = { + "metadata": { + "split_strategy": "single_document", + "source_documents": [], + "extraction_method": "ai_generation" + }, + "documents": documents + } + + return json.dumps(result, indent=2) + diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py new file mode 100644 index 00000000..cc45b099 --- /dev/null +++ b/modules/services/serviceAi/subStructureFilling.py @@ -0,0 +1,546 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Filling Module + +Handles filling document structure with content, including: +- Filling sections with content parts +- Building section generation prompts +- Aggregation logic +""" +import json +import logging +import copy +from typing import Dict, Any, List, Optional + +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum + +logger = logging.getLogger(__name__) + + +class StructureFiller: + """Handles filling document structure with content.""" + + def __init__(self, services, aiService): + """Initialize StructureFiller with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def fillStructure( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D: Füllt Struktur mit tatsächlichem Content. + Für jede Section: + - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format + - Wenn generation_hint spezifiziert: Generiere AI-Content + + **Implementierungsdetails:** + - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) + - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + + Args: + structure: Struktur-Dict mit documents und sections + contentParts: Alle vorbereiteten ContentParts + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Gefüllte Struktur mit elements in jeder Section + """ + # Erstelle Operation-ID für Struktur-Abfüllen + fillOperationId = f"{parentOperationId}_structure_filling" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=parentOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) + sections_to_process = [] + all_sections_list = [] # Für Kontext-Informationen + for doc in filledStructure.get("documents", []): + doc_sections = doc.get("sections", []) + all_sections_list.extend(doc_sections) + for section in doc_sections: + sections_to_process.append((doc, section)) + + # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) + for sectionIndex, (doc, section) in enumerate(sections_to_process): + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + contentType = section.get("content_type", "paragraph") + + elements = [] + + # Prüfe ob Aggregation nötig ist + needsAggregation = self._needsAggregation( + contentType=contentType, + contentPartCount=len(contentPartIds) + ) + + if needsAggregation and generationHint: + # Aggregation: Alle Parts zusammen verarbeiten + sectionParts = [ + self._findContentPartById(pid, contentParts) + for pid in contentPartIds + ] + sectionParts = [p for p in sectionParts if p is not None] + + if sectionParts: + # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt) + extractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted" + ] + nonExtractedParts = [ + p for p in sectionParts + if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted" + ] + + # Verarbeite non-extracted Parts separat (reference, object) + for part in nonExtractedParts: + contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + elif contentFormat == "object": + elements.append({ + "type": part.typeGroup, + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + # Aggregiere extracted Parts mit AI + if extractedParts: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=extractedParts, # ALLE PARTS für Aggregation! + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=True + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation (Aggregation)", + "Section", + f"Generating section {sectionId} with {len(extractedParts)} parts", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!) + request = AiCallRequest( + prompt=generationPrompt, + contentParts=extractedParts, # ALLE PARTS! + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + else: + # Einzelverarbeitung: Jeder Part einzeln + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + if generationHint: + # AI-Call mit einzelnen ContentPart + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[part], # EIN PART + userPrompt=userPrompt, + generationHint=generationHint, + allSections=all_sections_list, + sectionIndex=sectionIndex, + isAggregation=False + ) + + # Erstelle Operation-ID für Section-Generierung + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Debug: Log Prompt + self.services.utils.writeDebugFile( + generationPrompt, + f"section_content_{sectionId}_prompt" + ) + + # Verwende callAi für ContentParts-Unterstützung + request = AiCallRequest( + prompt=generationPrompt, + contentParts=[part], + options=AiCallOptions( + operationType=OperationTypeEnum.DATA_ANALYSE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED + ) + ) + aiResponse = await self.aiService.callAi(request) + + # Debug: Log Response + self.services.utils.writeDebugFile( + aiResponse.content, + f"section_content_{sectionId}_response" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse.content) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + else: + # Füge extrahierten Text direkt hinzu (kein AI-Call) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) + + section["elements"] = elements + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return filledStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in fillStructure: {str(e)}") + raise + + def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[Optional[ContentPart]], + userPrompt: str, + generationHint: str, + allSections: Optional[List[Dict[str, Any]]] = None, + sectionIndex: Optional[int] = None, + isAggregation: bool = False + ) -> str: + """Baue Prompt für Section-Generierung mit vollständigem Kontext.""" + # Filtere None-Werte + validParts = [p for p in contentParts if p is not None] + + # Section-Metadaten + sectionId = section.get("id", "unknown") + contentType = section.get("content_type", "paragraph") + + # Baue ContentParts-Beschreibung + contentPartsText = "" + if isAggregation: + # Aggregation: Zeige nur Metadaten, nicht Previews + contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n" + contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n" + contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n" + contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n" + contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n" + contentPartsText += f"ContentPart IDs:\n" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}" + if part.metadata.get("originalFileName"): + contentPartsText += f", Source: {part.metadata.get('originalFileName')}" + contentPartsText += ")\n" + else: + # Einzelverarbeitung: Zeige Previews + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + contentPartsText += f" Type: {part.typeGroup}\n" + if part.metadata.get("originalFileName"): + contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n" + + if contentFormat == "extracted": + # Zeige Preview von extrahiertem Text (länger für besseren Kontext) + previewLength = 1000 + if part.data: + preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data + contentPartsText += f" Content preview:\n```\n{preview}\n```\n" + else: + contentPartsText += f" Content: (empty)\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + contentPartsText += f" Object type: {part.typeGroup}\n" + contentPartsText += f" MIME type: {part.mimeType}\n" + contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n" + if part.metadata.get("usageHint"): + contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n" + + # Baue Section-Kontext (vorherige und nachfolgende Sections) + contextText = "" + if allSections and sectionIndex is not None: + prevSections = [] + nextSections = [] + + if sectionIndex > 0: + for i in range(max(0, sectionIndex - 2), sectionIndex): + prevSection = allSections[i] + prevSections.append({ + "id": prevSection.get("id"), + "content_type": prevSection.get("content_type"), + "generation_hint": prevSection.get("generation_hint", "")[:100] + }) + + if sectionIndex < len(allSections) - 1: + for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)): + nextSection = allSections[i] + nextSections.append({ + "id": nextSection.get("id"), + "content_type": nextSection.get("content_type"), + "generation_hint": nextSection.get("generation_hint", "")[:100] + }) + + if prevSections or nextSections: + contextText = "\n## DOCUMENT CONTEXT\n" + if prevSections: + contextText += "\nPrevious sections:\n" + for prev in prevSections: + contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n" + if nextSections: + contextText += "\nFollowing sections:\n" + for next in nextSections: + contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n" + + if isAggregation: + prompt = f"""# TASK: Generate Section Content (Aggregation) + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data) +3. For table content_type: Create a single table with headers and rows from all ContentParts +4. For bullet_list content_type: Create a single list with items from all ContentParts +5. Format appropriately based on content_type ({contentType}) +6. Ensure the generated content fits logically between previous and following sections +7. Return ONLY a JSON object with an "elements" array +8. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "headers": [...], // if table + "rows": [...], // if table + "items": [...], // if bullet_list + "content": "..." // if paragraph + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + else: + prompt = f"""# TASK: Generate Section Content + +## SECTION METADATA +- Section ID: {sectionId} +- Content Type: {contentType} +- Generation Hint: {generationHint} +{contextText} + +## USER REQUEST (for context) +``` +{userPrompt} +``` + +## AVAILABLE CONTENT FOR THIS SECTION +{contentPartsText if contentPartsText else "(No content parts specified for this section)"} + +## INSTRUCTIONS +1. Generate content for section "{sectionId}" based on the generation hint above +2. Use the available content parts to populate this section +3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data +4. For extracted text: Format appropriately based on content_type ({contentType}) +5. Ensure the generated content fits logically between previous and following sections +6. Return ONLY a JSON object with an "elements" array +7. Each element should match the content_type: {contentType} + +## OUTPUT FORMAT +Return a JSON object with this structure: +```json +{{ + "elements": [ + {{ + "type": "{contentType}", + "content": "..." + }} + ] +}} +``` + +CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON. +""" + return prompt + + def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: + """Finde ContentPart nach ID.""" + for part in contentParts: + if part.id == partId: + return part + return None + + def _needsAggregation( + self, + contentType: str, + contentPartCount: int + ) -> bool: + """ + Bestimmt ob mehrere ContentParts aggregiert werden müssen. + + Aggregation nötig wenn: + - content_type erfordert Aggregation (table, bullet_list) + - UND mehrere ContentParts vorhanden sind (> 1) + + Args: + contentType: Section content_type + contentPartCount: Anzahl der ContentParts in dieser Section + + Returns: + True wenn Aggregation nötig, False sonst + """ + aggregationTypes = ["table", "bullet_list"] + + if contentType in aggregationTypes and contentPartCount > 1: + return True + + # Optional: Auch für paragraph wenn mehrere Parts vorhanden + # (z.B. Vergleich mehrerer Dokumente) + # Standard: Keine Aggregation für paragraph + return False + diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py new file mode 100644 index 00000000..eb39fdd6 --- /dev/null +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -0,0 +1,229 @@ +# Copyright (c) 2025 Patrick Motsch +# All rights reserved. +""" +Structure Generation Module + +Handles document structure generation, including: +- Generating document structure with sections +- Building structure prompts +""" +import json +import logging +from typing import Dict, Any, List + +from modules.datamodels.datamodelExtraction import ContentPart + +logger = logging.getLogger(__name__) + + +class StructureGenerator: + """Handles document structure generation.""" + + def __init__(self, services, aiService): + """Initialize StructureGenerator with service center and AI service access.""" + self.services = services + self.aiService = aiService + + async def generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5C: Generiert Dokument-Struktur mit Sections. + Jede Section spezifiziert: + - Welcher Content sollte in dieser Section sein + - Welche ContentParts zu verwenden sind + - Format für jeden ContentPart + + Args: + userPrompt: User-Anfrage + contentParts: Alle vorbereiteten ContentParts mit Metadaten + outputFormat: Ziel-Format (html, docx, pdf, etc.) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Struktur-Dict mit documents und sections + """ + # Erstelle Operation-ID für Struktur-Generierung + structureOperationId = f"{parentOperationId}_structure_generation" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + structureOperationId, + "Structure Generation", + "Structure", + f"Generating structure for {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Baue Struktur-Prompt mit Content-Index + structurePrompt = self._buildStructurePrompt( + userPrompt=userPrompt, + contentParts=contentParts, + outputFormat=outputFormat + ) + + # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.aiService.callAiPlanning( + prompt=structurePrompt, + debugType="document_generation_structure" + ) + + # Parse Struktur + structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) + + # ChatLog abschließen + self.services.chat.progressLogFinish(structureOperationId, True) + + return structure + + except Exception as e: + self.services.chat.progressLogFinish(structureOperationId, False) + logger.error(f"Error in generateStructure: {str(e)}") + raise + + def _buildStructurePrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str + ) -> str: + """Baue Prompt für Struktur-Generierung.""" + # Baue ContentParts-Index - filtere leere Parts heraus + contentPartsIndex = "" + validParts = [] + filteredParts = [] + + for part in contentParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + + # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen + if contentFormat == "reference": + validParts.append(part) + logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)") + continue + + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + # ABER: Reference Parts wurden bereits oben behandelt + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + # Überspringe Container-Parts ohne Daten + if part.typeGroup == "container" and not part.data: + filteredParts.append((part.id, "container without data")) + continue + # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt) + if not part.data: + filteredParts.append((part.id, f"no data (format: {contentFormat})")) + continue + + validParts.append(part) + logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}") + + if filteredParts: + logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}") + + logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)") + + # Baue Index nur für gültige Parts + for i, part in enumerate(validParts, 1): + contentFormat = part.metadata.get("contentFormat", "unknown") + dataPreview = "" + + if contentFormat == "extracted": + # Für Image-Parts: Zeige dass es ein Image ist + if part.typeGroup == "image": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "image" + dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" + elif part.typeGroup == "container": + # Container ohne Daten überspringen wir bereits oben + dataPreview = "Container structure (no text content)" + else: + # Zeige Preview von extrahiertem Text + if part.data: + preview = part.data[:200] + "..." if len(part.data) > 200 else part.data + dataPreview = preview + else: + dataPreview = "(empty)" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "binary" + if part.typeGroup == "image": + dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" + else: + dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" + elif contentFormat == "reference": + dataPreview = part.metadata.get("documentReference", "reference") + + originalFileName = part.metadata.get('originalFileName', 'N/A') + + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" + contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Original file name: {originalFileName}\n" + contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" + contentPartsIndex += f" Data preview: {dataPreview}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts available)" + + prompt = f"""USER REQUEST: +{userPrompt} + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +TASK: Generiere Dokument-Struktur mit Sections. +Für jede Section, spezifiziere: +- section id +- content_type (heading, paragraph, image, table, etc.) +- contentPartIds: [Liste von ContentPart-IDs zu verwenden] +- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist +- generation_hint: Was AI für diese Section generieren soll +- elements: [] (leer, wird in nächster Phase gefüllt) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "metadata": {{ + "title": "Document Title", + "language": "de" + }}, + "documents": [{{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.{outputFormat}", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "generation_hint": "Main title", + "contentPartIds": [], + "contentFormats": {{}}, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "paragraph", + "generation_hint": "Introduction paragraph", + "contentPartIds": ["part_ext_1"], + "contentFormats": {{ + "part_ext_1": "extracted" + }}, + "elements": [] + }} + ] + }}] +}} + +Return ONLY valid JSON following the structure above. +""" + return prompt + diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index a2972453..ba4bfb69 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -856,7 +856,10 @@ class ExtractionService: merged_parts = applyMerging(content_parts, merge_strategy) # Phase 6: Enhanced format with metadata preservation - # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing + # CRITICAL: Don't add SOURCE markers for internal use - metadata is already preserved in ContentPart objects + # SOURCE markers should ONLY be added when content is returned directly to user for display/debugging + # For extraction content used in generation pipelines, metadata is in ContentPart.metadata, not in text markers + # Check if this is a generation response by looking at operationType or content structure isGenerationResponse = False if options and hasattr(options, 'operationType'): @@ -880,23 +883,14 @@ class ExtractionService: except: pass + # ROOT CAUSE FIX: Never add SOURCE markers - metadata is preserved in ContentPart.metadata + # SOURCE markers pollute content and cause issues when content is used in generation pipelines + # If traceability is needed, use ContentPart.metadata fields (documentId, documentMimeType, label, etc.) content_sections = [] for part in merged_parts: - if isGenerationResponse: - # For generation responses, return JSON directly without SOURCE markers - content_sections.append(part.data) - else: - # For extraction responses, include metadata in section header for traceability - doc_id = part.metadata.get("documentId", "unknown") - doc_mime = part.metadata.get("documentMimeType", "unknown") - label = part.label or "content" - - section = f""" -[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}] -{part.data} -[END SOURCE] -""" - content_sections.append(section) + # Always return clean content without SOURCE markers + # Metadata is available in ContentPart.metadata for traceability + content_sections.append(part.data if part.data else "") final_content = "\n\n".join(content_sections) diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 54c7e64b..213bf641 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -299,36 +299,14 @@ class RendererHtml(BaseRenderer): def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: """Render a single JSON section to HTML using AI-generated styles. Supports three content formats: reference, object (base64), extracted_text. + WICHTIG: Respektiert sectionType (content_type) für korrekte Rendering-Logik. """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) - # Check for three content formats from Phase 5D in elements - if isinstance(sectionData, list): - htmlParts = [] - for element in sectionData: - element_type = element.get("type", "") if isinstance(element, dict) else "" - - # Support three content formats from Phase 5D - if element_type == "reference": - # Document reference format - doc_ref = element.get("documentReference", "") - label = element.get("label", "Reference") - htmlParts.append(f'
[Reference: {label}]
') - continue - elif element_type == "extracted_text": - # Extracted text format - content = element.get("content", "") - source = element.get("source", "") - if content: - source_text = f' (Source: {source})' if source else '' - htmlParts.append(f'{content}{source_text}
') - continue - - # If we processed reference/extracted_text elements, return them - if htmlParts: - return '\n'.join(htmlParts) + # WICHTIG: Respektiere sectionType (content_type) ZUERST, dann process elements entsprechend + # Process elements according to section's content_type, not just element types if sectionType == "table": # Process the section data to extract table structure @@ -339,8 +317,58 @@ class RendererHtml(BaseRenderer): processedData = self._processSectionByType(section) return self._renderJsonBulletList(processedData, styles) elif sectionType == "heading": + # Extract text from elements for heading rendering + if isinstance(sectionData, list): + # Extract text from heading elements + headingText = "" + for element in sectionData: + if isinstance(element, dict): + element_type = element.get("type", "") + if element_type == "heading": + headingText = element.get("content", element.get("text", "")) + break + elif element_type == "extracted_text": + # Use extracted text as heading if no heading element found + content = element.get("content", "") + if content and not headingText: + # Extract first line or title from extracted text + headingText = content.split('\n')[0].strip() + # Remove markdown formatting + headingText = headingText.replace('#', '').replace('**', '').strip() + break + elif "text" in element: + headingText = element.get("text", "") + break + if headingText: + return self._renderJsonHeading({"text": headingText, "level": 2}, styles) return self._renderJsonHeading(sectionData, styles) elif sectionType == "paragraph": + # Process paragraph elements, including extracted_text + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + if element_type == "reference": + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'[Reference: {label}]
') + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'{content}{source_text}
') + elif isinstance(element, dict): + # Regular paragraph element + text = element.get("text", element.get("content", "")) + if text: + htmlParts.append(f'{text}
') + elif isinstance(element, str): + htmlParts.append(f'{element}
') + + if htmlParts: + return '\n'.join(htmlParts) return self._renderJsonParagraph(sectionData, styles) elif sectionType == "code_block": # Process the section data to extract code block structure @@ -351,6 +379,25 @@ class RendererHtml(BaseRenderer): processedData = self._processSectionByType(section) return self._renderJsonImage(processedData, styles) else: + # Fallback: Check for special element types first + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + if element_type == "reference": + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'[Reference: {label}]
') + elif element_type == "extracted_text": + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'{content}{source_text}
') + + if htmlParts: + return '\n'.join(htmlParts) # Fallback to paragraph for unknown types return self._renderJsonParagraph(sectionData, styles) diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py index 49860665..3e33c996 100644 --- a/tests/functional/test09_document_generation_formats.py +++ b/tests/functional/test09_document_generation_formats.py @@ -214,14 +214,14 @@ class DocumentGenerationFormatsTester: self.workflow = workflow print(f"Workflow started: {workflow.id}") - # Wait for workflow completion + # Wait for workflow completion (no timeout - wait indefinitely) print(f"Waiting for workflow completion...") - completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout + completed = await self.waitForWorkflowCompletion(timeout=None) if not completed: return { "success": False, - "error": "Workflow did not complete within timeout", + "error": "Workflow did not complete", "workflowId": workflow.id, "status": workflow.status if workflow else "unknown" } @@ -243,7 +243,7 @@ class DocumentGenerationFormatsTester: "results": results } - async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool: + async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool: """Wait for workflow to complete.""" if not self.workflow: return False @@ -253,9 +253,12 @@ class DocumentGenerationFormatsTester: interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser) + if timeout is None: + print("Waiting indefinitely (no timeout)") + while True: - # Check timeout - if time.time() - startTime > timeout: + # Check timeout only if specified + if timeout is not None and time.time() - startTime > timeout: print(f"\n⏱️ Timeout after {timeout} seconds") return False @@ -455,13 +458,13 @@ class DocumentGenerationFormatsTester: self.workflow = workflow print(f"Workflow started: {workflow.id}") - # Wait for workflow completion - completed = await self.waitForWorkflowCompletion(timeout=300) + # Wait for workflow completion (no timeout - wait indefinitely) + completed = await self.waitForWorkflowCompletion(timeout=None) if not completed: results[testType] = { "success": False, - "error": "Workflow did not complete within timeout", + "error": "Workflow did not complete", "workflowId": workflow.id } continue