From 60a0543e865b212f0ecfe4ff07ae3a481c2a9042 Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Thu, 25 Dec 2025 23:51:47 +0100
Subject: [PATCH] refactored ai service with chapter generation
---
.../serviceAi/README_MODULE_STRUCTURE.md | 78 ++
.../services/serviceAi/REFACTORING_PLAN.md | 126 ++
modules/services/serviceAi/mainServiceAi.py | 1129 ++++++++++++++---
.../serviceAi/subContentExtraction.py | 670 ++++++++++
.../services/serviceAi/subDocumentIntents.py | 302 +++++
.../services/serviceAi/subResponseParsing.py | 275 ++++
.../services/serviceAi/subStructureFilling.py | 546 ++++++++
.../serviceAi/subStructureGeneration.py | 229 ++++
.../mainServiceExtraction.py | 26 +-
.../renderers/rendererHtml.py | 97 +-
.../test09_document_generation_formats.py | 21 +-
11 files changed, 3256 insertions(+), 243 deletions(-)
create mode 100644 modules/services/serviceAi/README_MODULE_STRUCTURE.md
create mode 100644 modules/services/serviceAi/REFACTORING_PLAN.md
create mode 100644 modules/services/serviceAi/subContentExtraction.py
create mode 100644 modules/services/serviceAi/subDocumentIntents.py
create mode 100644 modules/services/serviceAi/subResponseParsing.py
create mode 100644 modules/services/serviceAi/subStructureFilling.py
create mode 100644 modules/services/serviceAi/subStructureGeneration.py
diff --git a/modules/services/serviceAi/README_MODULE_STRUCTURE.md b/modules/services/serviceAi/README_MODULE_STRUCTURE.md
new file mode 100644
index 00000000..d2fca8f5
--- /dev/null
+++ b/modules/services/serviceAi/README_MODULE_STRUCTURE.md
@@ -0,0 +1,78 @@
+# Module Structure - serviceAi
+
+## Übersicht
+
+Das `mainServiceAi.py` Modul wurde in mehrere Submodule aufgeteilt, um die Übersichtlichkeit zu verbessern.
+
+## Modulstruktur
+
+### Hauptmodul
+- **mainServiceAi.py** (~800 Zeilen)
+ - Initialisierung (`__init__`, `create`, `ensureAiObjectsInitialized`)
+ - Public API (`callAiPlanning`, `callAiContent`)
+ - Routing zu Submodulen
+ - Helper-Methoden
+
+### Submodule
+
+1. **subJsonResponseHandling.py** (bereits vorhanden)
+ - JSON Response Merging
+ - Section Merging
+ - Fragment Detection
+
+2. **subResponseParsing.py** (~200 Zeilen)
+ - `ResponseParser.extractSectionsFromResponse()` - Extrahiert Sections aus AI-Responses
+ - `ResponseParser.shouldContinueGeneration()` - Entscheidet ob Generation fortgesetzt werden soll
+ - `ResponseParser._isStuckInLoop()` - Loop-Detection
+ - `ResponseParser.extractDocumentMetadata()` - Extrahiert Metadaten
+ - `ResponseParser.buildFinalResultFromSections()` - Baut finales JSON
+
+3. **subDocumentIntents.py** (~300 Zeilen)
+ - `DocumentIntentAnalyzer.clarifyDocumentIntents()` - Analysiert Dokument-Intents
+ - `DocumentIntentAnalyzer.resolvePreExtractedDocument()` - Löst pre-extracted Dokumente auf
+ - `DocumentIntentAnalyzer._buildIntentAnalysisPrompt()` - Baut Intent-Analyse-Prompt
+
+4. **subContentExtraction.py** (~600 Zeilen)
+ - `ContentExtractor.extractAndPrepareContent()` - Extrahiert und bereitet Content vor
+ - `ContentExtractor.extractTextFromImage()` - Vision AI für Bilder
+ - `ContentExtractor.processTextContentWithAi()` - AI-Verarbeitung von Text
+ - `ContentExtractor._isBinary()` - Helper für Binary-Check
+
+5. **subStructureGeneration.py** (~200 Zeilen)
+ - `StructureGenerator.generateStructure()` - Generiert Dokument-Struktur
+ - `StructureGenerator._buildStructurePrompt()` - Baut Struktur-Prompt
+
+6. **subStructureFilling.py** (~400 Zeilen)
+ - `StructureFiller.fillStructure()` - Füllt Struktur mit Content
+ - `StructureFiller._buildSectionGenerationPrompt()` - Baut Section-Generation-Prompt
+ - `StructureFiller._findContentPartById()` - Helper für ContentPart-Suche
+ - `StructureFiller._needsAggregation()` - Entscheidet ob Aggregation nötig
+
+7. **subAiCallLooping.py** (~400 Zeilen)
+ - `AiCallLooper.callAiWithLooping()` - Haupt-Looping-Logik
+ - `AiCallLooper._defineKpisFromPrompt()` - KPI-Definition
+
+## Verwendung
+
+Alle Submodule werden über das Hauptmodul `AiService` verwendet:
+
+```python
+# Initialisierung
+aiService = await AiService.create(serviceCenter)
+
+# Submodule werden automatisch initialisiert
+# aiService.responseParser
+# aiService.intentAnalyzer
+# aiService.contentExtractor
+# etc.
+```
+
+## Migration
+
+Die öffentliche API bleibt unverändert. Interne Methoden wurden in Submodule verschoben:
+
+- `_extractSectionsFromResponse` → `responseParser.extractSectionsFromResponse`
+- `_clarifyDocumentIntents` → `intentAnalyzer.clarifyDocumentIntents`
+- `_extractAndPrepareContent` → `contentExtractor.extractAndPrepareContent`
+- etc.
+
diff --git a/modules/services/serviceAi/REFACTORING_PLAN.md b/modules/services/serviceAi/REFACTORING_PLAN.md
new file mode 100644
index 00000000..2ce7a717
--- /dev/null
+++ b/modules/services/serviceAi/REFACTORING_PLAN.md
@@ -0,0 +1,126 @@
+# Refactoring Plan für mainServiceAi.py
+
+## Ziel
+Aufteilen des 3000-Zeilen-Moduls in überschaubare Submodule (~300-600 Zeilen pro Modul).
+
+## Vorgeschlagene Struktur
+
+### Bereits erstellt:
+1. ✅ `subResponseParsing.py` - ResponseParser Klasse
+2. ✅ `subDocumentIntents.py` - DocumentIntentAnalyzer Klasse
+
+### Noch zu erstellen:
+3. `subContentExtraction.py` - ContentExtractor Klasse
+ - `extractAndPrepareContent()` (~490 Zeilen)
+ - `extractTextFromImage()` (~55 Zeilen)
+ - `processTextContentWithAi()` (~72 Zeilen)
+ - `_isBinary()` (~10 Zeilen)
+
+4. `subStructureGeneration.py` - StructureGenerator Klasse
+ - `generateStructure()` (~60 Zeilen)
+ - `_buildStructurePrompt()` (~130 Zeilen)
+
+5. `subStructureFilling.py` - StructureFiller Klasse
+ - `fillStructure()` (~290 Zeilen)
+ - `_buildSectionGenerationPrompt()` (~185 Zeilen)
+ - `_findContentPartById()` (~5 Zeilen)
+ - `_needsAggregation()` (~20 Zeilen)
+
+6. `subAiCallLooping.py` - AiCallLooper Klasse
+ - `callAiWithLooping()` (~405 Zeilen)
+ - `_defineKpisFromPrompt()` (~92 Zeilen)
+
+## Refactoring-Schritte für mainServiceAi.py
+
+### Schritt 1: Submodule-Initialisierung erweitern
+
+```python
+def _initializeSubmodules(self):
+ """Initialize all submodules after aiObjects is ready."""
+ if self.aiObjects is None:
+ raise RuntimeError("aiObjects must be initialized before initializing submodules")
+
+ if self.extractionService is None:
+ logger.info("Initializing ExtractionService...")
+ self.extractionService = ExtractionService(self.services)
+
+ # Neue Submodule initialisieren
+ from modules.services.serviceAi.subResponseParsing import ResponseParser
+ from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
+ from modules.services.serviceAi.subContentExtraction import ContentExtractor
+ from modules.services.serviceAi.subStructureGeneration import StructureGenerator
+ from modules.services.serviceAi.subStructureFilling import StructureFiller
+
+ if not hasattr(self, 'responseParser'):
+ self.responseParser = ResponseParser(self.services)
+
+ if not hasattr(self, 'intentAnalyzer'):
+ self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self)
+
+ if not hasattr(self, 'contentExtractor'):
+ self.contentExtractor = ContentExtractor(self.services, self)
+
+ if not hasattr(self, 'structureGenerator'):
+ self.structureGenerator = StructureGenerator(self.services, self)
+
+ if not hasattr(self, 'structureFiller'):
+ self.structureFiller = StructureFiller(self.services, self)
+```
+
+### Schritt 2: Methoden durch Delegation ersetzen
+
+**Beispiel für Response Parsing:**
+```python
+# ALT:
+def _extractSectionsFromResponse(self, ...):
+ # 100 Zeilen Code
+ ...
+
+# NEU:
+def _extractSectionsFromResponse(self, ...):
+ return self.responseParser.extractSectionsFromResponse(...)
+```
+
+**Beispiel für Document Intents:**
+```python
+# ALT:
+async def _clarifyDocumentIntents(self, ...):
+ # 100 Zeilen Code
+ ...
+
+# NEU:
+async def _clarifyDocumentIntents(self, ...):
+ return await self.intentAnalyzer.clarifyDocumentIntents(...)
+```
+
+### Schritt 3: Helper-Methoden beibehalten
+
+Kleine Helper-Methoden bleiben im Hauptmodul:
+- `_buildPromptWithPlaceholders()`
+- `_getIntentForDocument()`
+- `_shouldSkipContentPart()`
+- `_determineDocumentName()`
+
+### Schritt 4: Public API unverändert lassen
+
+Die öffentliche API (`callAiPlanning`, `callAiContent`) bleibt unverändert.
+
+## Erwartete Ergebnis-Größen
+
+- `mainServiceAi.py`: ~800-1000 Zeilen (von 3016)
+- `subResponseParsing.py`: ~200 Zeilen ✅
+- `subDocumentIntents.py`: ~300 Zeilen ✅
+- `subContentExtraction.py`: ~600 Zeilen
+- `subStructureGeneration.py`: ~200 Zeilen
+- `subStructureFilling.py`: ~400 Zeilen
+- `subAiCallLooping.py`: ~500 Zeilen
+
+**Gesamt: ~3000 Zeilen** (gleich, aber besser organisiert)
+
+## Vorteile
+
+1. **Übersichtlichkeit**: Jedes Modul hat eine klare Verantwortlichkeit
+2. **Wartbarkeit**: Änderungen sind lokalisiert
+3. **Testbarkeit**: Module können einzeln getestet werden
+4. **Wiederverwendbarkeit**: Module können in anderen Kontexten verwendet werden
+
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 74b90346..f8ab4dad 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -50,6 +50,33 @@ class AiService:
if self.extractionService is None:
logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services)
+
+ # Initialize new submodules
+ from modules.services.serviceAi.subResponseParsing import ResponseParser
+ from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
+ from modules.services.serviceAi.subContentExtraction import ContentExtractor
+ from modules.services.serviceAi.subStructureGeneration import StructureGenerator
+ from modules.services.serviceAi.subStructureFilling import StructureFiller
+
+ if not hasattr(self, 'responseParser'):
+ logger.info("Initializing ResponseParser...")
+ self.responseParser = ResponseParser(self.services)
+
+ if not hasattr(self, 'intentAnalyzer'):
+ logger.info("Initializing DocumentIntentAnalyzer...")
+ self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self)
+
+ if not hasattr(self, 'contentExtractor'):
+ logger.info("Initializing ContentExtractor...")
+ self.contentExtractor = ContentExtractor(self.services, self, self.intentAnalyzer)
+
+ if not hasattr(self, 'structureGenerator'):
+ logger.info("Initializing StructureGenerator...")
+ self.structureGenerator = StructureGenerator(self.services, self)
+
+ if not hasattr(self, 'structureFiller'):
+ logger.info("Initializing StructureFiller...")
+ self.structureFiller = StructureFiller(self.services, self)
async def callAi(self, request: AiCallRequest, progressCallback=None):
"""Router: handles content parts via extractionService, text context via interface.
@@ -684,6 +711,19 @@ If no trackable items can be identified, return: {{"kpis": []}}
debugPrefix: str,
allSections: List[Dict[str, Any]] = None,
accumulationState: Optional[JsonAccumulationState] = None
+ ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
+ """Delegate to ResponseParser."""
+ return self.responseParser.extractSectionsFromResponse(
+ result, iteration, debugPrefix, allSections, accumulationState
+ )
+
+ def _extractSectionsFromResponse_OLD(
+ self,
+ result: str,
+ iteration: int,
+ debugPrefix: str,
+ allSections: List[Dict[str, Any]] = None,
+ accumulationState: Optional[JsonAccumulationState] = None
) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
"""
Extract sections from AI response, handling both valid and broken JSON.
@@ -783,6 +823,18 @@ If no trackable items can be identified, return: {{"kpis": []}}
iteration: int,
wasJsonComplete: bool,
rawResponse: str = None
+ ) -> bool:
+ """Delegate to ResponseParser."""
+ return self.responseParser.shouldContinueGeneration(
+ allSections, iteration, wasJsonComplete, rawResponse
+ )
+
+ def _shouldContinueGeneration_OLD(
+ self,
+ allSections: List[Dict[str, Any]],
+ iteration: int,
+ wasJsonComplete: bool,
+ rawResponse: str = None
) -> bool:
"""
Determine if AI generation loop should continue.
@@ -859,6 +911,13 @@ If no trackable items can be identified, return: {{"kpis": []}}
def _extractDocumentMetadata(
self,
parsedResult: Dict[str, Any]
+ ) -> Optional[Dict[str, Any]]:
+ """Delegate to ResponseParser."""
+ return self.responseParser.extractDocumentMetadata(parsedResult)
+
+ def _extractDocumentMetadata_OLD(
+ self,
+ parsedResult: Dict[str, Any]
) -> Optional[Dict[str, Any]]:
"""
Extract document metadata (title, filename) from parsed AI response.
@@ -885,6 +944,14 @@ If no trackable items can be identified, return: {{"kpis": []}}
self,
allSections: List[Dict[str, Any]],
documentMetadata: Optional[Dict[str, Any]] = None
+ ) -> str:
+ """Delegate to ResponseParser."""
+ return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
+
+ def _buildFinalResultFromSections_OLD(
+ self,
+ allSections: List[Dict[str, Any]],
+ documentMetadata: Optional[Dict[str, Any]] = None
) -> str:
"""
Build final JSON result from accumulated sections.
@@ -1090,6 +1157,18 @@ If no trackable items can be identified, return: {{"kpis": []}}
userPrompt: str,
actionParameters: Dict[str, Any],
parentOperationId: str
+ ) -> List[DocumentIntent]:
+ """Delegate to DocumentIntentAnalyzer."""
+ return await self.intentAnalyzer.clarifyDocumentIntents(
+ documents, userPrompt, actionParameters, parentOperationId
+ )
+
+ async def _clarifyDocumentIntents_OLD(
+ self,
+ documents: List[ChatDocument],
+ userPrompt: str,
+ actionParameters: Dict[str, Any],
+ parentOperationId: str
) -> List[DocumentIntent]:
"""
Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen.
@@ -1189,6 +1268,7 @@ If no trackable items can be identified, return: {{"kpis": []}}
Returns None wenn kein pre-extracted Format erkannt wird.
"""
if document.mimeType != "application/json":
+ logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check")
return None
try:
@@ -1208,10 +1288,14 @@ If no trackable items can be identified, return: {{"kpis": []}}
validationMetadata = jsonData.get("validationMetadata", {})
actionType = validationMetadata.get("actionType")
+ logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}")
+
if actionType == "context.extractContent":
# Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}}
documentData = jsonData.get("documentData")
- logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}")
+ logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}")
+ else:
+ logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})")
if documentData:
from modules.datamodels.datamodelExtraction import ContentExtracted
@@ -1258,7 +1342,11 @@ If no trackable items can be identified, return: {{"kpis": []}}
except Exception as parseError:
logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}")
logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}")
+ import traceback
+ logger.debug(f"Parse error traceback: {traceback.format_exc()}")
return None
+ else:
+ logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})")
return None
except Exception as e:
@@ -1277,26 +1365,122 @@ If no trackable items can be identified, return: {{"kpis": []}}
Extrahierter Text oder None bei Fehler
"""
try:
- from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ # Final extraction prompt
+ finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting."
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ finalPrompt,
+ f"content_extraction_prompt_image_{imagePart.id}"
+ )
# Erstelle AI-Call-Request mit Image-Part
request = AiCallRequest(
- prompt=extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting.",
+ prompt=finalPrompt,
context="",
- options=AiCallOptions(operationType="extraction"),
+ options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE),
contentParts=[imagePart]
)
# Verwende AI-Service für Vision AI-Verarbeitung
- response = await self.services.ai.call(request)
+ response = await self.services.ai.callAi(request)
+
+ # Debug-Log für Response (harmonisiert)
+ if response and response.content:
+ self.services.utils.writeDebugFile(
+ response.content,
+ f"content_extraction_response_image_{imagePart.id}"
+ )
if response and response.content:
return response.content.strip()
- return None
+ # Kein Content zurückgegeben - return error message für Debugging
+ errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}"
+ logger.warning(errorMsg)
+ return f"[ERROR: {errorMsg}]"
except Exception as e:
- logger.warning(f"Error extracting text from image {imagePart.id}: {str(e)}")
- return None
+ errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}"
+ logger.error(errorMsg)
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Return error message statt None für Debugging
+ return f"[ERROR: {errorMsg}]"
+
+ async def _processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]:
+ """
+ Verarbeite Text-Content mit AI basierend auf extractionPrompt.
+
+ WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text
+ (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI
+ verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt.
+
+ Args:
+ textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ)
+ extractionPrompt: Prompt für die AI-Verarbeitung des Textes
+
+ Returns:
+ AI-verarbeiteter Text oder None bei Fehler
+ """
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ # Final extraction prompt
+ finalPrompt = extractionPrompt or "Process and extract the key information from the following text content."
+
+ # Debug-Log (harmonisiert) - log prompt with text preview
+ textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "")
+ promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}"
+ self.services.utils.writeDebugFile(
+ promptWithContext,
+ f"content_extraction_prompt_text_{textPart.id}"
+ )
+
+ # Erstelle Text-ContentPart für AI-Verarbeitung
+ # Verwende den vorhandenen Text als Input
+ textContentPart = ContentPart(
+ id=textPart.id,
+ label=textPart.label,
+ typeGroup="text",
+ mimeType="text/plain",
+ data=textPart.data if textPart.data else "",
+ metadata=textPart.metadata.copy() if textPart.metadata else {}
+ )
+
+ # Erstelle AI-Call-Request mit Text-Part
+ request = AiCallRequest(
+ prompt=finalPrompt,
+ context="",
+ options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT),
+ contentParts=[textContentPart]
+ )
+
+ # Verwende AI-Service für Text-Verarbeitung
+ response = await self.services.ai.callAi(request)
+
+ # Debug-Log für Response (harmonisiert)
+ if response and response.content:
+ self.services.utils.writeDebugFile(
+ response.content,
+ f"content_extraction_response_text_{textPart.id}"
+ )
+
+ if response and response.content:
+ return response.content.strip()
+
+ # Kein Content zurückgegeben - return error message für Debugging
+ errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}"
+ logger.warning(errorMsg)
+ return f"[ERROR: {errorMsg}]"
+ except Exception as e:
+ errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}"
+ logger.error(errorMsg)
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Return error message statt None für Debugging
+ return f"[ERROR: {errorMsg}]"
def _buildIntentAnalysisPrompt(
self,
@@ -1390,6 +1574,17 @@ Return ONLY valid JSON following the structure above.
documents: List[ChatDocument],
documentIntents: List[DocumentIntent],
parentOperationId: str
+ ) -> List[ContentPart]:
+ """Delegate to ContentExtractor."""
+ return await self.contentExtractor.extractAndPrepareContent(
+ documents, documentIntents, parentOperationId, self._getIntentForDocument
+ )
+
+ async def _extractAndPrepareContent_OLD(
+ self,
+ documents: List[ChatDocument],
+ documentIntents: List[DocumentIntent],
+ parentOperationId: str
) -> List[ContentPart]:
"""
Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor.
@@ -1425,12 +1620,25 @@ Return ONLY valid JSON following the structure above.
for document in documents:
# Check if document is already a ContentExtracted document (pre-extracted JSON)
+ logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content")
preExtracted = self._resolvePreExtractedDocument(document)
if preExtracted:
+ logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}")
+ logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}")
+ logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}")
+
# Verwende bereits extrahierte ContentParts direkt
contentExtracted = preExtracted["contentExtracted"]
+
+ # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original
+ # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID)
intent = self._getIntentForDocument(document.id, documentIntents)
+ logger.info(f" Intent lookup for document {document.id}: found={intent is not None}")
+ if intent:
+ logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...")
+ else:
+ logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}")
if contentExtracted.parts:
for part in contentExtracted.parts:
@@ -1445,96 +1653,267 @@ Return ONLY valid JSON following the structure above.
# Ensure metadata is complete
if "documentId" not in part.metadata:
part.metadata["documentId"] = document.id
- if "contentFormat" not in part.metadata:
- part.metadata["contentFormat"] = "extracted"
# WICHTIG: Prüfe Intent für dieses Part
partIntent = intent.intents if intent else ["extract"]
- # Wenn Intent "render" für Images hat, erstelle auch object Part
- if "render" in partIntent and part.typeGroup == "image" and part.data:
- # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part
- # 1. Extracted Part - prüfe ob "extract" Intent vorhanden ist
- if "extract" in partIntent:
- # Image hat sowohl extract als auch render Intent
- # Extracted Part: Wird mit Vision AI verarbeitet (skipExtraction=False)
- part.metadata["intent"] = "extract"
- part.metadata["fromExtractContent"] = True
- part.metadata["skipExtraction"] = False # WICHTIG: Vision AI-Verarbeitung nötig!
- part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
- allContentParts.append(part)
- else:
- # Nur render Intent - kein Text-Extraktion nötig
- part.metadata["intent"] = "render"
- part.metadata["fromExtractContent"] = True
- part.metadata["skipExtraction"] = True
- part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
- allContentParts.append(part)
-
- # 2. Object Part für Rendering (base64 data ist bereits im extracted Part)
- objectPart = ContentPart(
- id=f"obj_{document.id}_{part.id}",
- label=f"Object: {part.label or 'Image'}",
- typeGroup="image",
- mimeType=part.mimeType or "image/jpeg",
- data=part.data, # Base64 data ist bereits vorhanden
+ # Debug-Logging für Intent-Verarbeitung
+ logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}")
+
+ # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart
+ # Generische Intent-Verarbeitung für ALLE Content-Typen
+ hasReferenceIntent = "reference" in partIntent
+ hasRenderIntent = "render" in partIntent
+ hasExtractIntent = "extract" in partIntent
+ hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0)
+
+ logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}")
+
+ # Track ob der originale Part bereits hinzugefügt wurde
+ originalPartAdded = False
+
+ # 1. Reference Intent: Erstelle Reference ContentPart
+ if hasReferenceIntent:
+ referencePart = ContentPart(
+ id=f"ref_{document.id}_{part.id}",
+ label=f"Reference: {part.label or 'Content'}",
+ typeGroup="reference",
+ mimeType=part.mimeType or "application/octet-stream",
+ data="", # Leer - nur Referenz
metadata={
- "contentFormat": "object",
+ "contentFormat": "reference",
"documentId": document.id,
- "intent": "render",
- "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}",
- "originalFileName": preExtracted["originalDocument"]["fileName"],
- "relatedExtractedPartId": part.id
+ "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}",
+ "intent": "reference",
+ "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}",
+ "originalFileName": preExtracted["originalDocument"]["fileName"]
}
)
- allContentParts.append(objectPart)
- elif part.typeGroup == "image" and "extract" in partIntent:
- # Image mit extract Intent: Vision AI-Verarbeitung nötig
- # Verarbeite Image mit Vision AI, um Text zu extrahieren
- try:
- extractedText = await self._extractTextFromImage(part, intent.extractionPrompt if intent else "Extract all text content from this image")
- if extractedText:
- # Erstelle neuen Text-Part mit extrahiertem Text
- textPart = ContentPart(
- id=f"extracted_{part.id}",
- label=f"Extracted text from {part.label or 'Image'}",
- typeGroup="text",
- mimeType="text/plain",
- data=extractedText,
- metadata={
- "contentFormat": "extracted",
- "documentId": document.id,
- "intent": "extract",
- "originalFileName": preExtracted["originalDocument"]["fileName"],
- "relatedImagePartId": part.id,
- "extractionPrompt": intent.extractionPrompt if intent else "Extract all text content from this image"
- }
- )
- allContentParts.append(textPart)
- logger.info(f"✅ Extracted text from image {part.id} using Vision AI")
+ allContentParts.append(referencePart)
+ logger.debug(f"✅ Created reference ContentPart for {part.id}")
+
+ # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering)
+ if hasRenderIntent and hasPartData:
+ # Prüfe ob es ein Binary/Image ist (kann gerendert werden)
+ isRenderable = (
+ part.typeGroup == "image" or
+ part.typeGroup == "binary" or
+ (part.mimeType and (
+ part.mimeType.startswith("image/") or
+ part.mimeType.startswith("video/") or
+ part.mimeType.startswith("audio/") or
+ self._isBinary(part.mimeType)
+ ))
+ )
+
+ if isRenderable:
+ objectPart = ContentPart(
+ id=f"obj_{document.id}_{part.id}",
+ label=f"Object: {part.label or 'Content'}",
+ typeGroup=part.typeGroup,
+ mimeType=part.mimeType or "application/octet-stream",
+ data=part.data, # Base64/Binary data ist bereits vorhanden
+ metadata={
+ "contentFormat": "object",
+ "documentId": document.id,
+ "intent": "render",
+ "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None
+ }
+ )
+ allContentParts.append(objectPart)
+ logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)")
+ else:
+ logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})")
+ elif hasRenderIntent and not hasPartData:
+ logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part")
+
+ # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung)
+ if hasExtractIntent:
+ # Spezielle Behandlung für Images: Vision AI für Text-Extraktion
+ if part.typeGroup == "image" and hasPartData:
+ logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)")
+ try:
+ extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting."
+ extractedText = await self._extractTextFromImage(part, extractionPrompt)
+ if extractedText:
+ # Prüfe ob es ein Error-Message ist
+ isError = extractedText.startswith("[ERROR:")
+
+ # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message
+ textPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=extractedText,
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "vision",
+ "isError": isError
+ }
+ )
+ allContentParts.append(textPart)
+ if isError:
+ logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}")
+ else:
+ logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars")
+ else:
+ # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
+ errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}"
+ logger.error(errorMsg)
+ errorPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Error extracting from {part.label or 'Image'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=f"[ERROR: {errorMsg}]",
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "vision",
+ "isError": True
+ }
+ )
+ allContentParts.append(errorPart)
+ except Exception as e:
+ logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}")
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part
+ # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen
+ if not hasRenderIntent:
+ logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available")
+ else:
+ # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird
+ # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content
+ # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist,
+ # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt.
- # Wenn auch render Intent vorhanden, füge Image-Part hinzu
- if "render" in partIntent:
- part.metadata["intent"] = "render"
- part.metadata["fromExtractContent"] = True
- part.metadata["skipExtraction"] = True
- part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
- allContentParts.append(part)
- except Exception as e:
- logger.warning(f"Failed to extract text from image {part.id}: {str(e)}, adding image as-is")
- # Fallback: Füge Image-Part hinzu ohne Text-Extraktion
- part.metadata["intent"] = "extract"
- part.metadata["fromExtractContent"] = True
- part.metadata["skipExtraction"] = False
- part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
- allContentParts.append(part)
- else:
- # Normales extracted Part (kein Image oder kein extract Intent)
- part.metadata["intent"] = partIntent[0] if partIntent else "extract"
- part.metadata["fromExtractContent"] = True
- part.metadata["skipExtraction"] = True # Bereits extrahiert
- part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"]
+ # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden)
+ isTextContent = (
+ part.typeGroup == "text" or
+ part.typeGroup == "table" or
+ (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0)
+ )
+
+ if isTextContent and intent and intent.extractionPrompt:
+ # Text-Content mit extractionPrompt: Verarbeite mit AI
+ logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)")
+ try:
+ extractionPrompt = intent.extractionPrompt
+ processedText = await self._processTextContentWithAi(part, extractionPrompt)
+ if processedText:
+ # Prüfe ob es ein Error-Message ist
+ isError = processedText.startswith("[ERROR:")
+
+ # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message
+ processedPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=processedText,
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "ai",
+ "sourcePartId": part.id,
+ "fromExtractContent": True,
+ "isError": isError
+ }
+ )
+ allContentParts.append(processedPart)
+ originalPartAdded = True
+ if isError:
+ logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}")
+ else:
+ logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars")
+ else:
+ # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
+ errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}"
+ logger.error(errorMsg)
+ errorPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Error processing {part.label or 'Content'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=f"[ERROR: {errorMsg}]",
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "ai",
+ "sourcePartId": part.id,
+ "isError": True
+ }
+ )
+ allContentParts.append(errorPart)
+ originalPartAdded = True
+ except Exception as e:
+ logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}")
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Fallback: Verwende Original-Part
+ if not originalPartAdded:
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True,
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
+ })
+ allContentParts.append(part)
+ originalPartAdded = True
+ else:
+ # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted
+ # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig)
+ # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent)
+ if not originalPartAdded:
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True, # Bereits extrahiert
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
+ })
+ # Stelle sicher dass contentFormat gesetzt ist
+ if "contentFormat" not in part.metadata:
+ part.metadata["contentFormat"] = "extracted"
+ allContentParts.append(part)
+ originalPartAdded = True
+ logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)")
+
+ # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt
+ # (sollte normalerweise nicht vorkommen, da default "extract" ist)
+ if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded:
+ logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default")
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True,
+ "originalFileName": preExtracted["originalDocument"]["fileName"]
+ })
allContentParts.append(part)
+ originalPartAdded = True
logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}")
logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}")
@@ -1715,6 +2094,18 @@ Return ONLY valid JSON following the structure above.
contentParts: List[ContentPart],
outputFormat: str,
parentOperationId: str
+ ) -> Dict[str, Any]:
+ """Delegate to StructureGenerator."""
+ return await self.structureGenerator.generateStructure(
+ userPrompt, contentParts, outputFormat, parentOperationId
+ )
+
+ async def _generateStructure_OLD(
+ self,
+ userPrompt: str,
+ contentParts: List[ContentPart],
+ outputFormat: str,
+ parentOperationId: str
) -> Dict[str, Any]:
"""
Phase 5C: Generiert Dokument-Struktur mit Sections.
@@ -1782,17 +2173,36 @@ Return ONLY valid JSON following the structure above.
# Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = ""
validParts = []
+ filteredParts = []
+
for part in contentParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+
+ # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen
+ if contentFormat == "reference":
+ validParts.append(part)
+ logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)")
+ continue
+
# Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt)
+ # ABER: Reference Parts wurden bereits oben behandelt
if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
# Überspringe Container-Parts ohne Daten
if part.typeGroup == "container" and not part.data:
+ filteredParts.append((part.id, "container without data"))
continue
- # Überspringe andere leere Parts
+ # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt)
if not part.data:
+ filteredParts.append((part.id, f"no data (format: {contentFormat})"))
continue
validParts.append(part)
+ logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}")
+
+ if filteredParts:
+ logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}")
+
+ logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)")
# Baue Index nur für gültige Parts
for i, part in enumerate(validParts, 1):
@@ -1825,11 +2235,14 @@ Return ONLY valid JSON following the structure above.
elif contentFormat == "reference":
dataPreview = part.metadata.get("documentReference", "reference")
+ originalFileName = part.metadata.get('originalFileName', 'N/A')
+
contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
+ contentPartsIndex += f" Original file name: {originalFileName}\n"
contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
contentPartsIndex += f" Data preview: {dataPreview}\n"
@@ -1896,6 +2309,18 @@ Return ONLY valid JSON following the structure above.
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
+ ) -> Dict[str, Any]:
+ """Delegate to StructureFiller."""
+ return await self.structureFiller.fillStructure(
+ structure, contentParts, userPrompt, parentOperationId
+ )
+
+ async def _fillStructure_OLD(
+ self,
+ structure: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str,
+ parentOperationId: str
) -> Dict[str, Any]:
"""
Phase 5D: Füllt Struktur mit tatsächlichem Content.
@@ -1935,105 +2360,244 @@ Return ONLY valid JSON following the structure above.
# Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden)
sections_to_process = []
+ all_sections_list = [] # Für Kontext-Informationen
for doc in filledStructure.get("documents", []):
- for section in doc.get("sections", []):
+ doc_sections = doc.get("sections", [])
+ all_sections_list.extend(doc_sections)
+ for section in doc_sections:
sections_to_process.append((doc, section))
# Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden)
- for doc, section in sections_to_process:
+ for sectionIndex, (doc, section) in enumerate(sections_to_process):
sectionId = section.get("id")
contentPartIds = section.get("contentPartIds", [])
contentFormats = section.get("contentFormats", {})
generationHint = section.get("generation_hint")
+ contentType = section.get("content_type", "paragraph")
elements = []
- # Verarbeite ContentParts
- for partId in contentPartIds:
- part = self._findContentPartById(partId, contentParts)
- if not part:
- continue
-
- contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
-
- if contentFormat == "reference":
- # Füge Dokument-Referenz hinzu
- elements.append({
- "type": "reference",
- "documentReference": part.metadata.get("documentReference"),
- "label": part.metadata.get("usageHint", part.label)
- })
-
- elif contentFormat == "object":
- # Füge base64 Object hinzu
- elements.append({
- "type": part.typeGroup, # "image", "binary", etc.
- "base64Data": part.data,
- "mimeType": part.mimeType,
- "altText": part.metadata.get("usageHint", part.label)
- })
-
- elif contentFormat == "extracted":
- # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden)
- elements.append({
- "type": "extracted_text",
- "content": part.data,
- "source": part.metadata.get("documentId"),
- "extractionPrompt": part.metadata.get("extractionPrompt")
- })
+ # Prüfe ob Aggregation nötig ist
+ needsAggregation = self._needsAggregation(
+ contentType=contentType,
+ contentPartCount=len(contentPartIds)
+ )
- # Generiere AI-Content wenn nötig
- if generationHint:
- generationPrompt = self._buildSectionGenerationPrompt(
- section=section,
- contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds],
- userPrompt=userPrompt,
- generationHint=generationHint
- )
+ if needsAggregation and generationHint:
+ # Aggregation: Alle Parts zusammen verarbeiten
+ sectionParts = [
+ self._findContentPartById(pid, contentParts)
+ for pid in contentPartIds
+ ]
+ sectionParts = [p for p in sectionParts if p is not None]
- # Erstelle Operation-ID für Section-Generierung
- # Debug-Logs werden bereits von callAiPlanning geschrieben
- sectionOperationId = f"{fillOperationId}_section_{sectionId}"
-
- # Starte ChatLog mit Parent-Referenz
- self.services.chat.progressLogStart(
- sectionOperationId,
- "Section Generation",
- "Section",
- f"Generating section {sectionId}",
- parentOperationId=fillOperationId
- )
-
- try:
- # Generiere Content (verwende callAiPlanning für einfache JSON-Responses)
- # Debug-Logs werden bereits von callAiPlanning geschrieben
- aiResponse = await self.callAiPlanning(
- prompt=generationPrompt,
- debugType=f"section_generation_{sectionId}"
- )
+ if sectionParts:
+ # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt)
+ extractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
+ ]
+ nonExtractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
+ ]
- # Parse und füge zu elements hinzu
- generatedElements = json.loads(
- self.services.utils.jsonExtractString(aiResponse)
- )
- if isinstance(generatedElements, list):
- elements.extend(generatedElements)
- elif isinstance(generatedElements, dict) and "elements" in generatedElements:
- elements.extend(generatedElements["elements"])
+ # Verarbeite non-extracted Parts separat (reference, object)
+ for part in nonExtractedParts:
+ contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+ elif contentFormat == "object":
+ elements.append({
+ "type": part.typeGroup,
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
- # ChatLog abschließen
- self.services.chat.progressLogFinish(sectionOperationId, True)
-
- except Exception as e:
- # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!)
- self.services.chat.progressLogFinish(sectionOperationId, False)
- elements.append({
- "type": "error",
- "message": f"Error generating section {sectionId}: {str(e)}",
- "sectionId": sectionId
- })
- logger.error(f"Error generating section {sectionId}: {str(e)}")
- # NICHT raise - Section wird mit Fehlermeldung gerendert
+ # Aggregiere extracted Parts mit AI
+ if extractedParts:
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=extractedParts, # ALLE PARTS für Aggregation!
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=True
+ )
+
+ # Erstelle Operation-ID für Section-Generierung
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation (Aggregation)",
+ "Section",
+ f"Generating section {sectionId} with {len(extractedParts)} parts",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!)
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=extractedParts, # ALLE PARTS!
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.callAi(request)
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ # Parse und füge zu elements hinzu
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!)
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+ # NICHT raise - Section wird mit Fehlermeldung gerendert
+
+ else:
+ # Einzelverarbeitung: Jeder Part einzeln
+ for partId in contentPartIds:
+ part = self._findContentPartById(partId, contentParts)
+ if not part:
+ continue
+
+ contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ # Füge Dokument-Referenz hinzu
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "object":
+ # Füge base64 Object hinzu
+ elements.append({
+ "type": part.typeGroup, # "image", "binary", etc.
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "extracted":
+ if generationHint:
+ # AI-Call mit einzelnen ContentPart
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=[part], # EIN PART
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=False
+ )
+
+ # Erstelle Operation-ID für Section-Generierung
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation",
+ "Section",
+ f"Generating section {sectionId}",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ # Verwende callAi für ContentParts-Unterstützung
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=[part],
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.callAi(request)
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ # Parse und füge zu elements hinzu
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!)
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+ # NICHT raise - Section wird mit Fehlermeldung gerendert
+ else:
+ # Füge extrahierten Text direkt hinzu (kein AI-Call)
+ elements.append({
+ "type": "extracted_text",
+ "content": part.data,
+ "source": part.metadata.get("documentId"),
+ "extractionPrompt": part.metadata.get("extractionPrompt")
+ })
section["elements"] = elements
@@ -2052,35 +2616,185 @@ Return ONLY valid JSON following the structure above.
section: Dict[str, Any],
contentParts: List[Optional[ContentPart]],
userPrompt: str,
- generationHint: str
+ generationHint: str,
+ allSections: Optional[List[Dict[str, Any]]] = None,
+ sectionIndex: Optional[int] = None,
+ isAggregation: bool = False
) -> str:
- """Baue Prompt für Section-Generierung."""
+ """Baue Prompt für Section-Generierung mit vollständigem Kontext."""
# Filtere None-Werte
validParts = [p for p in contentParts if p is not None]
- contentPartsText = ""
- for part in validParts:
- contentFormat = part.metadata.get("contentFormat", "unknown")
- contentPartsText += f"\n- ContentPart {part.id}:\n"
- contentPartsText += f" Format: {contentFormat}\n"
- if contentFormat == "extracted":
- contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n"
- elif contentFormat == "reference":
- contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
- elif contentFormat == "object":
- contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n"
+ # Section-Metadaten
+ sectionId = section.get("id", "unknown")
+ contentType = section.get("content_type", "paragraph")
- prompt = f"""USER REQUEST:
+ # Baue ContentParts-Beschreibung
+ contentPartsText = ""
+ if isAggregation:
+ # Aggregation: Zeige nur Metadaten, nicht Previews
+ contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n"
+ contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n"
+ contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n"
+ contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n"
+ contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n"
+ contentPartsText += f"ContentPart IDs:\n"
+ for part in validParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}"
+ if part.metadata.get("originalFileName"):
+ contentPartsText += f", Source: {part.metadata.get('originalFileName')}"
+ contentPartsText += ")\n"
+ else:
+ # Einzelverarbeitung: Zeige Previews
+ for part in validParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ contentPartsText += f"\n- ContentPart {part.id}:\n"
+ contentPartsText += f" Format: {contentFormat}\n"
+ contentPartsText += f" Type: {part.typeGroup}\n"
+ if part.metadata.get("originalFileName"):
+ contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n"
+
+ if contentFormat == "extracted":
+ # Zeige Preview von extrahiertem Text (länger für besseren Kontext)
+ previewLength = 1000
+ if part.data:
+ preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
+ contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
+ else:
+ contentPartsText += f" Content: (empty)\n"
+ elif contentFormat == "reference":
+ contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
+ if part.metadata.get("usageHint"):
+ contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
+ elif contentFormat == "object":
+ dataLength = len(part.data) if part.data else 0
+ contentPartsText += f" Object type: {part.typeGroup}\n"
+ contentPartsText += f" MIME type: {part.mimeType}\n"
+ contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n"
+ if part.metadata.get("usageHint"):
+ contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
+
+ # Baue Section-Kontext (vorherige und nachfolgende Sections)
+ contextText = ""
+ if allSections and sectionIndex is not None:
+ prevSections = []
+ nextSections = []
+
+ if sectionIndex > 0:
+ for i in range(max(0, sectionIndex - 2), sectionIndex):
+ prevSection = allSections[i]
+ prevSections.append({
+ "id": prevSection.get("id"),
+ "content_type": prevSection.get("content_type"),
+ "generation_hint": prevSection.get("generation_hint", "")[:100]
+ })
+
+ if sectionIndex < len(allSections) - 1:
+ for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)):
+ nextSection = allSections[i]
+ nextSections.append({
+ "id": nextSection.get("id"),
+ "content_type": nextSection.get("content_type"),
+ "generation_hint": nextSection.get("generation_hint", "")[:100]
+ })
+
+ if prevSections or nextSections:
+ contextText = "\n## DOCUMENT CONTEXT\n"
+ if prevSections:
+ contextText += "\nPrevious sections:\n"
+ for prev in prevSections:
+ contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n"
+ if nextSections:
+ contextText += "\nFollowing sections:\n"
+ for next in nextSections:
+ contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n"
+
+ if isAggregation:
+ prompt = f"""# TASK: Generate Section Content (Aggregation)
+
+## SECTION METADATA
+- Section ID: {sectionId}
+- Content Type: {contentType}
+- Generation Hint: {generationHint}
+{contextText}
+
+## USER REQUEST (for context)
+```
{userPrompt}
+```
-SECTION TO GENERATE:
-{generationHint}
+## AVAILABLE CONTENT FOR THIS SECTION
+{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
-AVAILABLE CONTENT FOR THIS SECTION:
-{contentPartsText}
+## INSTRUCTIONS
+1. Generate content for section "{sectionId}" based on the generation hint above
+2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data)
+3. For table content_type: Create a single table with headers and rows from all ContentParts
+4. For bullet_list content_type: Create a single list with items from all ContentParts
+5. Format appropriately based on content_type ({contentType})
+6. Ensure the generated content fits logically between previous and following sections
+7. Return ONLY a JSON object with an "elements" array
+8. Each element should match the content_type: {contentType}
-CRITICAL: Return ONLY a JSON object with an "elements" array.
-Jedes Element sollte dem content_type der Section entsprechen.
+## OUTPUT FORMAT
+Return a JSON object with this structure:
+```json
+{{
+ "elements": [
+ {{
+ "type": "{contentType}",
+ "headers": [...], // if table
+ "rows": [...], // if table
+ "items": [...], // if bullet_list
+ "content": "..." // if paragraph
+ }}
+ ]
+}}
+```
+
+CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
+"""
+ else:
+ prompt = f"""# TASK: Generate Section Content
+
+## SECTION METADATA
+- Section ID: {sectionId}
+- Content Type: {contentType}
+- Generation Hint: {generationHint}
+{contextText}
+
+## USER REQUEST (for context)
+```
+{userPrompt}
+```
+
+## AVAILABLE CONTENT FOR THIS SECTION
+{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
+
+## INSTRUCTIONS
+1. Generate content for section "{sectionId}" based on the generation hint above
+2. Use the available content parts to populate this section
+3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data
+4. For extracted text: Format appropriately based on content_type ({contentType})
+5. Ensure the generated content fits logically between previous and following sections
+6. Return ONLY a JSON object with an "elements" array
+7. Each element should match the content_type: {contentType}
+
+## OUTPUT FORMAT
+Return a JSON object with this structure:
+```json
+{{
+ "elements": [
+ {{
+ "type": "{contentType}",
+ "content": "..."
+ }}
+ ]
+}}
+```
+
+CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
"""
return prompt
@@ -2091,6 +2805,35 @@ Jedes Element sollte dem content_type der Section entsprechen.
return part
return None
+ def _needsAggregation(
+ self,
+ contentType: str,
+ contentPartCount: int
+ ) -> bool:
+ """
+ Bestimmt ob mehrere ContentParts aggregiert werden müssen.
+
+ Aggregation nötig wenn:
+ - content_type erfordert Aggregation (table, bullet_list)
+ - UND mehrere ContentParts vorhanden sind (> 1)
+
+ Args:
+ contentType: Section content_type
+ contentPartCount: Anzahl der ContentParts in dieser Section
+
+ Returns:
+ True wenn Aggregation nötig, False sonst
+ """
+ aggregationTypes = ["table", "bullet_list"]
+
+ if contentType in aggregationTypes and contentPartCount > 1:
+ return True
+
+ # Optional: Auch für paragraph wenn mehrere Parts vorhanden
+ # (z.B. Vergleich mehrerer Dokumente)
+ # Standard: Keine Aggregation für paragraph
+ return False
+
async def _renderResult(
self,
filledStructure: Dict[str, Any],
diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py
new file mode 100644
index 00000000..229587f8
--- /dev/null
+++ b/modules/services/serviceAi/subContentExtraction.py
@@ -0,0 +1,670 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Content Extraction Module
+
+Handles content extraction and preparation, including:
+- Extracting content from documents based on intents
+- Processing pre-extracted documents
+- Vision AI for image text extraction
+- AI processing of text content
+"""
+import json
+import logging
+import base64
+from typing import Dict, Any, List, Optional
+
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
+
+logger = logging.getLogger(__name__)
+
+
+class ContentExtractor:
+ """Handles content extraction and preparation."""
+
+ def __init__(self, services, aiService, intentAnalyzer):
+ """Initialize ContentExtractor with service center, AI service, and intent analyzer access."""
+ self.services = services
+ self.aiService = aiService
+ self.intentAnalyzer = intentAnalyzer
+
+ async def extractAndPrepareContent(
+ self,
+ documents: List[ChatDocument],
+ documentIntents: List[DocumentIntent],
+ parentOperationId: str,
+ getIntentForDocument: callable
+ ) -> List[ContentPart]:
+ """
+ Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor.
+ Gibt Liste von ContentParts im passenden Format zurück.
+
+ WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind.
+ Beispiel: Bild mit intents=["extract", "render"] erzeugt:
+ - ContentPart(contentFormat="object", ...) für Rendering
+ - ContentPart(contentFormat="extracted", ...) für Text-Analyse
+
+ Args:
+ documents: Liste der zu verarbeitenden Dokumente
+ documentIntents: Liste von DocumentIntent-Objekten
+ parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
+ getIntentForDocument: Callable to get intent for document ID
+
+ Returns:
+ Liste von ContentParts mit vollständigen Metadaten
+ """
+ # Erstelle Operation-ID für Extraktion
+ extractionOperationId = f"{parentOperationId}_content_extraction"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ extractionOperationId,
+ "Content Extraction",
+ "Extraction",
+ f"Extracting from {len(documents)} documents",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ allContentParts = []
+
+ for document in documents:
+ # Check if document is already a ContentExtracted document (pre-extracted JSON)
+ logger.debug(f"Checking document {document.id} ({document.fileName}, mimeType={document.mimeType}) for pre-extracted content")
+ preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(document)
+
+ if preExtracted:
+ logger.info(f"✅ Found pre-extracted document: {document.fileName} -> Original: {preExtracted['originalDocument']['fileName']}")
+ logger.info(f" Pre-extracted document ID: {document.id}, Original document ID: {preExtracted['originalDocument']['id']}")
+ logger.info(f" ContentParts count: {len(preExtracted['contentExtracted'].parts) if preExtracted['contentExtracted'].parts else 0}")
+
+ # Verwende bereits extrahierte ContentParts direkt
+ contentExtracted = preExtracted["contentExtracted"]
+
+ # WICHTIG: Intent muss für das JSON-Dokument gefunden werden, nicht für das Original
+ # (Intent-Analyse mappt bereits zurück zu JSON-Dokument-ID)
+ intent = getIntentForDocument(document.id, documentIntents)
+ logger.info(f" Intent lookup for document {document.id}: found={intent is not None}")
+ if intent:
+ logger.info(f" Intent: {intent.intents}, extractionPrompt: {intent.extractionPrompt[:100] if intent.extractionPrompt else None}...")
+ else:
+ logger.warning(f" ⚠️ No intent found for pre-extracted document {document.id}! Available intent documentIds: {[i.documentId for i in documentIntents]}")
+
+ if contentExtracted.parts:
+ for part in contentExtracted.parts:
+ # Überspringe leere Parts (Container ohne Daten)
+ if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
+ if part.typeGroup == "container":
+ continue # Überspringe leere Container
+
+ if not part.metadata:
+ part.metadata = {}
+
+ # Ensure metadata is complete
+ if "documentId" not in part.metadata:
+ part.metadata["documentId"] = document.id
+
+ # WICHTIG: Prüfe Intent für dieses Part
+ partIntent = intent.intents if intent else ["extract"]
+
+ # Debug-Logging für Intent-Verarbeitung
+ logger.debug(f"Processing part {part.id}: typeGroup={part.typeGroup}, intents={partIntent}, hasData={bool(part.data)}, dataLength={len(str(part.data)) if part.data else 0}")
+
+ # WICHTIG: Ein Part kann mehrere Intents haben - erstelle für jeden Intent einen ContentPart
+ # Generische Intent-Verarbeitung für ALLE Content-Typen
+ hasReferenceIntent = "reference" in partIntent
+ hasRenderIntent = "render" in partIntent
+ hasExtractIntent = "extract" in partIntent
+ hasPartData = bool(part.data) and (not isinstance(part.data, str) or len(part.data.strip()) > 0)
+
+ logger.debug(f"Part {part.id}: reference={hasReferenceIntent}, render={hasRenderIntent}, extract={hasExtractIntent}, hasData={hasPartData}")
+
+ # Track ob der originale Part bereits hinzugefügt wurde
+ originalPartAdded = False
+
+ # 1. Reference Intent: Erstelle Reference ContentPart
+ if hasReferenceIntent:
+ referencePart = ContentPart(
+ id=f"ref_{document.id}_{part.id}",
+ label=f"Reference: {part.label or 'Content'}",
+ typeGroup="reference",
+ mimeType=part.mimeType or "application/octet-stream",
+ data="", # Leer - nur Referenz
+ metadata={
+ "contentFormat": "reference",
+ "documentId": document.id,
+ "documentReference": f"docItem:{document.id}:{preExtracted['originalDocument']['fileName']}",
+ "intent": "reference",
+ "usageHint": f"Reference: {preExtracted['originalDocument']['fileName']}",
+ "originalFileName": preExtracted["originalDocument"]["fileName"]
+ }
+ )
+ allContentParts.append(referencePart)
+ logger.debug(f"✅ Created reference ContentPart for {part.id}")
+
+ # 2. Render Intent: Erstelle Object ContentPart (für Binary/Image Rendering)
+ if hasRenderIntent and hasPartData:
+ # Prüfe ob es ein Binary/Image ist (kann gerendert werden)
+ isRenderable = (
+ part.typeGroup == "image" or
+ part.typeGroup == "binary" or
+ (part.mimeType and (
+ part.mimeType.startswith("image/") or
+ part.mimeType.startswith("video/") or
+ part.mimeType.startswith("audio/") or
+ self._isBinary(part.mimeType)
+ ))
+ )
+
+ if isRenderable:
+ objectPart = ContentPart(
+ id=f"obj_{document.id}_{part.id}",
+ label=f"Object: {part.label or 'Content'}",
+ typeGroup=part.typeGroup,
+ mimeType=part.mimeType or "application/octet-stream",
+ data=part.data, # Base64/Binary data ist bereits vorhanden
+ metadata={
+ "contentFormat": "object",
+ "documentId": document.id,
+ "intent": "render",
+ "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedExtractedPartId": f"extracted_{document.id}_{part.id}" if hasExtractIntent else None
+ }
+ )
+ allContentParts.append(objectPart)
+ logger.debug(f"✅ Created object ContentPart for {part.id} (render intent)")
+ else:
+ logger.warning(f"⚠️ Part {part.id} has render intent but is not renderable (typeGroup={part.typeGroup}, mimeType={part.mimeType})")
+ elif hasRenderIntent and not hasPartData:
+ logger.warning(f"⚠️ Part {part.id} has render intent but no data, skipping render part")
+
+ # 3. Extract Intent: Erstelle Extracted ContentPart (möglicherweise mit zusätzlicher Verarbeitung)
+ if hasExtractIntent:
+ # Spezielle Behandlung für Images: Vision AI für Text-Extraktion
+ if part.typeGroup == "image" and hasPartData:
+ logger.info(f"🔄 Processing image {part.id} with Vision AI (extract intent)")
+ try:
+ extractionPrompt = intent.extractionPrompt if intent and intent.extractionPrompt else "Extract all text content from this image. Return only the extracted text, no additional formatting."
+ extractedText = await self.extractTextFromImage(part, extractionPrompt)
+ if extractedText:
+ # Prüfe ob es ein Error-Message ist
+ isError = extractedText.startswith("[ERROR:")
+
+ # Erstelle neuen Text-Part mit extrahiertem Text oder Error-Message
+ textPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Extracted text from {part.label or 'Image'}" if not isError else f"Error extracting from {part.label or 'Image'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=extractedText,
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "vision",
+ "isError": isError
+ }
+ )
+ allContentParts.append(textPart)
+ if isError:
+ logger.error(f"❌ Vision AI extraction failed for image {part.id}: {extractedText}")
+ else:
+ logger.info(f"✅ Extracted text from image {part.id} using Vision AI: {len(extractedText)} chars")
+ else:
+ # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
+ errorMsg = f"Vision AI extraction failed: Unexpected empty response for image {part.id}"
+ logger.error(errorMsg)
+ errorPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Error extracting from {part.label or 'Image'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=f"[ERROR: {errorMsg}]",
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "vision",
+ "isError": True
+ }
+ )
+ allContentParts.append(errorPart)
+ except Exception as e:
+ logger.error(f"❌ Failed to extract text from image {part.id}: {str(e)}")
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Kein Fallback: Wenn render Intent vorhanden, haben wir bereits object Part
+ # Wenn nur extract Intent: Original Part ist kein Text, daher nicht als extracted hinzufügen
+ if not hasRenderIntent:
+ logger.debug(f"Image {part.id} has only extract intent, Vision AI failed - no extracted text available")
+ else:
+ # Für alle anderen Content-Typen: Prüfe ob AI-Verarbeitung benötigt wird
+ # WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Content
+ # (z.B. Text aus PDF-Text-Layer, Tabellen, etc.). Wenn "extract" Intent vorhanden ist,
+ # muss dieser Content mit AI verarbeitet werden basierend auf extractionPrompt.
+
+ # Prüfe ob Part Text-Content hat (kann mit AI verarbeitet werden)
+ isTextContent = (
+ part.typeGroup == "text" or
+ part.typeGroup == "table" or
+ (part.data and isinstance(part.data, str) and len(part.data.strip()) > 0)
+ )
+
+ if isTextContent and intent and intent.extractionPrompt:
+ # Text-Content mit extractionPrompt: Verarbeite mit AI
+ logger.info(f"🔄 Processing text content {part.id} with AI (extract intent with prompt)")
+ try:
+ extractionPrompt = intent.extractionPrompt
+ processedText = await self.processTextContentWithAi(part, extractionPrompt)
+ if processedText:
+ # Prüfe ob es ein Error-Message ist
+ isError = processedText.startswith("[ERROR:")
+
+ # Erstelle neuen Text-Part mit AI-verarbeitetem Text oder Error-Message
+ processedPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"AI-processed: {part.label or 'Content'}" if not isError else f"Error processing {part.label or 'Content'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=processedText,
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None,
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "ai",
+ "sourcePartId": part.id,
+ "fromExtractContent": True,
+ "isError": isError
+ }
+ )
+ allContentParts.append(processedPart)
+ originalPartAdded = True
+ if isError:
+ logger.error(f"❌ AI text processing failed for part {part.id}: {processedText}")
+ else:
+ logger.info(f"✅ Processed text content {part.id} with AI: {len(processedText)} chars")
+ else:
+ # Sollte nicht vorkommen (Funktion gibt jetzt immer Error-Message zurück)
+ errorMsg = f"AI text processing failed: Unexpected empty response for part {part.id}"
+ logger.error(errorMsg)
+ errorPart = ContentPart(
+ id=f"extracted_{document.id}_{part.id}",
+ label=f"Error processing {part.label or 'Content'}",
+ typeGroup="text",
+ mimeType="text/plain",
+ data=f"[ERROR: {errorMsg}]",
+ metadata={
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "intent": "extract",
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "extractionPrompt": extractionPrompt,
+ "extractionMethod": "ai",
+ "sourcePartId": part.id,
+ "isError": True
+ }
+ )
+ allContentParts.append(errorPart)
+ originalPartAdded = True
+ except Exception as e:
+ logger.error(f"❌ Failed to process text content {part.id} with AI: {str(e)}")
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Fallback: Verwende Original-Part
+ if not originalPartAdded:
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True,
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
+ })
+ allContentParts.append(part)
+ originalPartAdded = True
+ else:
+ # Kein extractionPrompt oder kein Text-Content: Verwende Part direkt als extracted
+ # (Content ist bereits extrahiert von context.extractContent, keine weitere AI-Verarbeitung nötig)
+ # WICHTIG: Nur hinzufügen wenn noch nicht hinzugefügt (z.B. durch render Intent)
+ if not originalPartAdded:
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True, # Bereits extrahiert
+ "originalFileName": preExtracted["originalDocument"]["fileName"],
+ "relatedObjectPartId": f"obj_{document.id}_{part.id}" if hasRenderIntent else None
+ })
+ # Stelle sicher dass contentFormat gesetzt ist
+ if "contentFormat" not in part.metadata:
+ part.metadata["contentFormat"] = "extracted"
+ allContentParts.append(part)
+ originalPartAdded = True
+ logger.debug(f"✅ Using pre-extracted ContentPart {part.id} as extracted (no AI processing needed)")
+
+ # 4. Fallback: Wenn kein Intent vorhanden oder Part wurde noch nicht hinzugefügt
+ # (sollte normalerweise nicht vorkommen, da default "extract" ist)
+ if not hasReferenceIntent and not hasRenderIntent and not hasExtractIntent and not originalPartAdded:
+ logger.warning(f"⚠️ Part {part.id} has no recognized intents, adding as extracted by default")
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "intent": "extract",
+ "fromExtractContent": True,
+ "skipExtraction": True,
+ "originalFileName": preExtracted["originalDocument"]["fileName"]
+ })
+ allContentParts.append(part)
+ originalPartAdded = True
+
+ logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}")
+ logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}")
+ continue # Skip normal extraction for this document
+
+ # Check if it's standardized JSON format (has "documents" or "sections")
+ if document.mimeType == "application/json":
+ try:
+ docBytes = self.services.interfaceDbComponent.getFileData(document.fileId)
+ if docBytes:
+ docData = docBytes.decode('utf-8')
+ jsonData = json.loads(docData)
+
+ if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData):
+ logger.info(f"Document is already in standardized JSON format, using as reference")
+ # Create reference ContentPart for structured JSON
+ contentPart = ContentPart(
+ id=f"ref_{document.id}",
+ label=f"Reference: {document.fileName}",
+ typeGroup="structure",
+ mimeType="application/json",
+ data=docData,
+ metadata={
+ "contentFormat": "reference",
+ "documentId": document.id,
+ "documentReference": f"docItem:{document.id}:{document.fileName}",
+ "skipExtraction": True,
+ "intent": "reference"
+ }
+ )
+ allContentParts.append(contentPart)
+ logger.info(f"✅ Using JSON document directly without extraction")
+ continue # Skip normal extraction for this document
+ except Exception as e:
+ logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}")
+ # Continue with normal extraction
+
+ # Normal extraction path
+ intent = getIntentForDocument(document.id, documentIntents)
+
+ if not intent:
+ # Default: extract für alle Dokumente ohne Intent
+ logger.warning(f"No intent found for document {document.id}, using default 'extract'")
+ intent = DocumentIntent(
+ documentId=document.id,
+ intents=["extract"],
+ extractionPrompt="Extract all content from the document",
+ reasoning="Default intent: no specific intent found"
+ )
+
+ # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen
+
+ if "reference" in intent.intents:
+ # Erstelle Reference ContentPart
+ contentPart = ContentPart(
+ id=f"ref_{document.id}",
+ label=f"Reference: {document.fileName}",
+ typeGroup="reference",
+ mimeType=document.mimeType,
+ data="",
+ metadata={
+ "contentFormat": "reference",
+ "documentId": document.id,
+ "documentReference": f"docItem:{document.id}:{document.fileName}",
+ "intent": "reference",
+ "usageHint": f"Reference document: {document.fileName}"
+ }
+ )
+ allContentParts.append(contentPart)
+
+ # WICHTIG: "render" und "extract" können beide vorhanden sein!
+ # In diesem Fall erzeugen wir BEIDE ContentParts
+
+ if "render" in intent.intents:
+ # Für Images/Binary: extrahiere als Object
+ if document.mimeType.startswith("image/") or self._isBinary(document.mimeType):
+ try:
+ # Lade Binary-Daten (getFileData ist nicht async - keine await nötig)
+ binaryData = self.services.interfaceDbComponent.getFileData(document.fileId)
+ if not binaryData:
+ logger.warning(f"No binary data found for document {document.id}")
+ continue
+ base64Data = base64.b64encode(binaryData).decode('utf-8')
+
+ contentPart = ContentPart(
+ id=f"obj_{document.id}",
+ label=f"Object: {document.fileName}",
+ typeGroup="image" if document.mimeType.startswith("image/") else "binary",
+ mimeType=document.mimeType,
+ data=base64Data,
+ metadata={
+ "contentFormat": "object",
+ "documentId": document.id,
+ "intent": "render",
+ "usageHint": f"Render as visual element: {document.fileName}",
+ "originalFileName": document.fileName,
+ # Verknüpfung zu extracted Part (falls vorhanden)
+ "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None
+ }
+ )
+ allContentParts.append(contentPart)
+ except Exception as e:
+ logger.error(f"Failed to load binary data for document {document.id}: {str(e)}")
+
+ if "extract" in intent.intents:
+ # Extrahiere Content mit Extraction Service
+ extractionPrompt = intent.extractionPrompt or "Extract all content from the document"
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ extractionPrompt,
+ f"content_extraction_prompt_{document.id}"
+ )
+
+ # Führe Extraktion aus
+ from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy
+
+ extractionOptions = ExtractionOptions(
+ prompt=extractionPrompt,
+ mergeStrategy=MergeStrategy()
+ )
+
+ # extractContent ist nicht async - keine await nötig
+ extractedResults = self.services.extraction.extractContent(
+ [document],
+ extractionOptions,
+ operationId=extractionOperationId,
+ parentOperationId=extractionOperationId
+ )
+
+ # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten
+ for extracted in extractedResults:
+ for part in extracted.parts:
+ # Markiere als extracted Format
+ part.metadata.update({
+ "contentFormat": "extracted",
+ "documentId": document.id,
+ "extractionPrompt": extractionPrompt,
+ "intent": "extract",
+ "usageHint": f"Use extracted content from {document.fileName}",
+ # Verknüpfung zu object Part (falls vorhanden)
+ "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None
+ })
+ # Stelle sicher, dass ID eindeutig ist (falls object Part existiert)
+ if "render" in intent.intents:
+ part.id = f"ext_{document.id}_{part.id}"
+ allContentParts.append(part)
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ json.dumps([part.dict() for part in allContentParts], indent=2, default=str),
+ "content_extraction_result"
+ )
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(extractionOperationId, True)
+
+ return allContentParts
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(extractionOperationId, False)
+ logger.error(f"Error in extractAndPrepareContent: {str(e)}")
+ raise
+
+ async def extractTextFromImage(self, imagePart: ContentPart, extractionPrompt: str) -> Optional[str]:
+ """
+ Extrahiere Text aus einem Image-Part mit Vision AI.
+
+ Args:
+ imagePart: ContentPart mit typeGroup="image"
+ extractionPrompt: Prompt für die Text-Extraktion
+
+ Returns:
+ Extrahierter Text oder None bei Fehler
+ """
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ # Final extraction prompt
+ finalPrompt = extractionPrompt or "Extract all text content from this image. Return only the extracted text, no additional formatting."
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ finalPrompt,
+ f"content_extraction_prompt_image_{imagePart.id}"
+ )
+
+ # Erstelle AI-Call-Request mit Image-Part
+ request = AiCallRequest(
+ prompt=finalPrompt,
+ context="",
+ options=AiCallOptions(operationType=OperationTypeEnum.IMAGE_ANALYSE),
+ contentParts=[imagePart]
+ )
+
+ # Verwende AI-Service für Vision AI-Verarbeitung
+ response = await self.aiService.callAi(request)
+
+ # Debug-Log für Response (harmonisiert)
+ if response and response.content:
+ self.services.utils.writeDebugFile(
+ response.content,
+ f"content_extraction_response_image_{imagePart.id}"
+ )
+
+ if response and response.content:
+ return response.content.strip()
+
+ # Kein Content zurückgegeben - return error message für Debugging
+ errorMsg = f"Vision AI extraction failed: No content returned for image {imagePart.id}"
+ logger.warning(errorMsg)
+ return f"[ERROR: {errorMsg}]"
+ except Exception as e:
+ errorMsg = f"Vision AI extraction failed for image {imagePart.id}: {str(e)}"
+ logger.error(errorMsg)
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Return error message statt None für Debugging
+ return f"[ERROR: {errorMsg}]"
+
+ async def processTextContentWithAi(self, textPart: ContentPart, extractionPrompt: str) -> Optional[str]:
+ """
+ Verarbeite Text-Content mit AI basierend auf extractionPrompt.
+
+ WICHTIG: Pre-extracted ContentParts von context.extractContent enthalten RAW extrahierten Text
+ (z.B. aus PDF-Text-Layer). Wenn "extract" Intent vorhanden ist, muss dieser Text mit AI
+ verarbeitet werden (Transformation, Strukturierung, etc.) basierend auf extractionPrompt.
+
+ Args:
+ textPart: ContentPart mit typeGroup="text" (oder anderer Text-basierter Typ)
+ extractionPrompt: Prompt für die AI-Verarbeitung des Textes
+
+ Returns:
+ AI-verarbeiteter Text oder None bei Fehler
+ """
+ try:
+ from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum
+
+ # Final extraction prompt
+ finalPrompt = extractionPrompt or "Process and extract the key information from the following text content."
+
+ # Debug-Log (harmonisiert) - log prompt with text preview
+ textPreview = textPart.data[:500] + "..." if textPart.data and len(textPart.data) > 500 else (textPart.data or "")
+ promptWithContext = f"{finalPrompt}\n\n--- Text Content (preview) ---\n{textPreview}"
+ self.services.utils.writeDebugFile(
+ promptWithContext,
+ f"content_extraction_prompt_text_{textPart.id}"
+ )
+
+ # Erstelle Text-ContentPart für AI-Verarbeitung
+ # Verwende den vorhandenen Text als Input
+ textContentPart = ContentPart(
+ id=textPart.id,
+ label=textPart.label,
+ typeGroup="text",
+ mimeType="text/plain",
+ data=textPart.data if textPart.data else "",
+ metadata=textPart.metadata.copy() if textPart.metadata else {}
+ )
+
+ # Erstelle AI-Call-Request mit Text-Part
+ request = AiCallRequest(
+ prompt=finalPrompt,
+ context="",
+ options=AiCallOptions(operationType=OperationTypeEnum.DATA_EXTRACT),
+ contentParts=[textContentPart]
+ )
+
+ # Verwende AI-Service für Text-Verarbeitung
+ response = await self.aiService.callAi(request)
+
+ # Debug-Log für Response (harmonisiert)
+ if response and response.content:
+ self.services.utils.writeDebugFile(
+ response.content,
+ f"content_extraction_response_text_{textPart.id}"
+ )
+
+ if response and response.content:
+ return response.content.strip()
+
+ # Kein Content zurückgegeben - return error message für Debugging
+ errorMsg = f"AI text processing failed: No content returned for text part {textPart.id}"
+ logger.warning(errorMsg)
+ return f"[ERROR: {errorMsg}]"
+ except Exception as e:
+ errorMsg = f"AI text processing failed for text part {textPart.id}: {str(e)}"
+ logger.error(errorMsg)
+ import traceback
+ logger.debug(f"Traceback: {traceback.format_exc()}")
+ # Return error message statt None für Debugging
+ return f"[ERROR: {errorMsg}]"
+
+ def _isBinary(self, mimeType: str) -> bool:
+ """Prüfe ob MIME-Type binary ist."""
+ binaryTypes = [
+ "application/octet-stream",
+ "application/pdf",
+ "application/zip",
+ "application/x-zip-compressed"
+ ]
+ return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/")
+
diff --git a/modules/services/serviceAi/subDocumentIntents.py b/modules/services/serviceAi/subDocumentIntents.py
new file mode 100644
index 00000000..c1faba39
--- /dev/null
+++ b/modules/services/serviceAi/subDocumentIntents.py
@@ -0,0 +1,302 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Document Intent Analysis Module
+
+Handles analysis of document intents, including:
+- Clarifying which documents need extraction vs reference
+- Resolving pre-extracted documents
+- Building intent analysis prompts
+"""
+import json
+import logging
+from typing import Dict, Any, List, Optional
+
+from modules.datamodels.datamodelChat import ChatDocument
+from modules.datamodels.datamodelExtraction import DocumentIntent
+
+logger = logging.getLogger(__name__)
+
+
+class DocumentIntentAnalyzer:
+ """Handles document intent analysis and resolution."""
+
+ def __init__(self, services, aiService):
+ """Initialize DocumentIntentAnalyzer with service center and AI service access."""
+ self.services = services
+ self.aiService = aiService
+
+ async def clarifyDocumentIntents(
+ self,
+ documents: List[ChatDocument],
+ userPrompt: str,
+ actionParameters: Dict[str, Any],
+ parentOperationId: str
+ ) -> List[DocumentIntent]:
+ """
+ Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen.
+ Gibt DocumentIntent für jedes Dokument zurück.
+
+ Args:
+ documents: Liste der zu verarbeitenden Dokumente
+ userPrompt: User-Anfrage
+ actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat)
+ parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
+
+ Returns:
+ Liste von DocumentIntent-Objekten
+ """
+ # Erstelle Operation-ID für Intent-Analyse
+ intentOperationId = f"{parentOperationId}_intent_analysis"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ intentOperationId,
+ "Document Intent Analysis",
+ "Intent Analysis",
+ f"Analyzing {len(documents)} documents",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse
+ documentMapping = {} # Maps original doc ID -> JSON doc ID
+ resolvedDocuments = []
+
+ for doc in documents:
+ preExtracted = self.resolvePreExtractedDocument(doc)
+ if preExtracted:
+ originalDocId = preExtracted["originalDocument"]["id"]
+ documentMapping[originalDocId] = doc.id
+ # Erstelle temporäres ChatDocument für ursprüngliches Dokument
+ originalDoc = ChatDocument(
+ id=originalDocId,
+ fileName=preExtracted["originalDocument"]["fileName"],
+ mimeType=preExtracted["originalDocument"]["mimeType"],
+ fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize),
+ fileId=doc.fileId, # Behalte fileId vom JSON
+ messageId=doc.messageId if hasattr(doc, 'messageId') else None # Behalte messageId falls vorhanden
+ )
+ resolvedDocuments.append(originalDoc)
+ else:
+ resolvedDocuments.append(doc)
+
+ # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten
+ intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters)
+
+ # AI-Call (verwende callAiPlanning für einfache JSON-Responses)
+ # Debug-Logs werden bereits von callAiPlanning geschrieben
+ aiResponse = await self.aiService.callAiPlanning(
+ prompt=intentPrompt,
+ debugType="document_intent_analysis"
+ )
+
+ # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig
+ intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse))
+ documentIntents = []
+ for intent in intentsData.get("intents", []):
+ docId = intent.get("documentId")
+ # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID
+ if docId in documentMapping:
+ intent["documentId"] = documentMapping[docId]
+ documentIntents.append(DocumentIntent(**intent))
+
+ # Debug-Log (harmonisiert)
+ self.services.utils.writeDebugFile(
+ json.dumps([intent.dict() for intent in documentIntents], indent=2),
+ "document_intent_analysis_result"
+ )
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(intentOperationId, True)
+
+ return documentIntents
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(intentOperationId, False)
+ logger.error(f"Error in clarifyDocumentIntents: {str(e)}")
+ raise
+
+ def resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]:
+ """
+ Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält.
+ Gibt Dict zurück mit:
+ - originalDocument: ChatDocument-Info des ursprünglichen Dokuments
+ - contentExtracted: ContentExtracted-Objekt mit Parts
+ - parts: Liste der ContentParts
+
+ Returns None wenn kein pre-extracted Format erkannt wird.
+ """
+ if document.mimeType != "application/json":
+ logger.debug(f"Document {document.id} is not JSON (mimeType={document.mimeType}), skipping pre-extracted check")
+ return None
+
+ try:
+ docBytes = self.services.interfaceDbComponent.getFileData(document.fileId)
+ if not docBytes:
+ return None
+
+ docData = docBytes.decode('utf-8')
+ jsonData = json.loads(docData)
+
+ if not isinstance(jsonData, dict):
+ return None
+
+ # Check for ContentExtracted format
+ # Nur Format 1 (ActionDocument-Format mit validationMetadata) wird unterstützt
+ documentData = None
+
+ validationMetadata = jsonData.get("validationMetadata", {})
+ actionType = validationMetadata.get("actionType")
+ logger.debug(f"JSON document {document.id}: validationMetadata.actionType={actionType}, keys={list(jsonData.keys())}")
+
+ if actionType == "context.extractContent":
+ # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}}
+ documentData = jsonData.get("documentData")
+ logger.debug(f"Found ContentExtracted via validationMetadata for {document.fileName}, documentData keys: {list(documentData.keys()) if documentData else None}")
+ else:
+ logger.debug(f"JSON document {document.id} does not have actionType='context.extractContent' (got: {actionType})")
+
+ if documentData:
+ from modules.datamodels.datamodelExtraction import ContentExtracted
+
+ try:
+ # Stelle sicher, dass "id" vorhanden ist
+ if "id" not in documentData:
+ documentData["id"] = document.id
+
+ contentExtracted = ContentExtracted(**documentData)
+
+ if contentExtracted.parts:
+ # Extrahiere ursprüngliche Dokument-Info aus den Parts
+ originalDocId = None
+ originalFileName = None
+ originalMimeType = None
+
+ for part in contentExtracted.parts:
+ if part.metadata:
+ # Versuche ursprüngliche Dokument-Info zu finden
+ if not originalDocId and part.metadata.get("documentId"):
+ originalDocId = part.metadata.get("documentId")
+ if not originalFileName and part.metadata.get("originalFileName"):
+ originalFileName = part.metadata.get("originalFileName")
+ if not originalMimeType and part.metadata.get("documentMimeType"):
+ originalMimeType = part.metadata.get("documentMimeType")
+
+ # Falls nicht gefunden, versuche aus documentName zu extrahieren
+ if not originalFileName:
+ # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf")
+ if document.fileName and "_extracted_" in document.fileName:
+ originalFileName = document.fileName.split("_extracted_")[0] + ".pdf"
+
+ return {
+ "originalDocument": {
+ "id": originalDocId or document.id,
+ "fileName": originalFileName or document.fileName,
+ "mimeType": originalMimeType or "application/pdf",
+ "fileSize": document.fileSize
+ },
+ "contentExtracted": contentExtracted,
+ "parts": contentExtracted.parts
+ }
+ except Exception as parseError:
+ logger.warning(f"Could not parse ContentExtracted format from {document.fileName}: {str(parseError)}")
+ logger.debug(f"JSON keys: {list(jsonData.keys())}, has parts: {'parts' in jsonData}")
+ import traceback
+ logger.debug(f"Parse error traceback: {traceback.format_exc()}")
+ return None
+ else:
+ logger.debug(f"JSON document {document.id} has no documentData (actionType={actionType})")
+
+ return None
+ except Exception as e:
+ logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}")
+ return None
+
+ def _buildIntentAnalysisPrompt(
+ self,
+ userPrompt: str,
+ documents: List[ChatDocument],
+ actionParameters: Dict[str, Any]
+ ) -> str:
+ """Baue Prompt für Intent-Analyse."""
+ # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs
+ docListText = ""
+ for i, doc in enumerate(documents, 1):
+ # Prüfe ob es ein pre-extracted JSON ist
+ preExtracted = self.resolvePreExtractedDocument(doc)
+
+ if preExtracted:
+ # Zeige ursprüngliches Dokument statt JSON
+ originalDoc = preExtracted["originalDocument"]
+ partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})"
+ docListText += f"\n{i}. Document ID: {originalDoc['id']}\n"
+ docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n"
+ docListText += f" MIME Type: {originalDoc['mimeType']}\n"
+ docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n"
+ else:
+ # Normales Dokument
+ docListText += f"\n{i}. Document ID: {doc.id}\n"
+ docListText += f" File Name: {doc.fileName}\n"
+ docListText += f" MIME Type: {doc.mimeType}\n"
+ docListText += f" File Size: {doc.fileSize} bytes\n"
+
+ outputFormat = actionParameters.get("outputFormat", "txt")
+
+ prompt = f"""USER REQUEST:
+{userPrompt}
+
+DOCUMENTS TO ANALYZE:
+{docListText}
+
+TASK: For each document, determine its intents (can be multiple):
+- "extract": Content extraction needed (text, structure, OCR, etc.)
+- "render": Image/binary should be rendered as-is (visual element)
+- "reference": Document reference/attachment (no extraction, just reference)
+
+OUTPUT FORMAT: {outputFormat}
+
+RETURN JSON:
+{{
+ "intents": [
+ {{
+ "documentId": "doc_1",
+ "intents": ["extract"], # Array - can contain multiple!
+ "extractionPrompt": "Extract all text content, preserving structure",
+ "reasoning": "User needs text content for document generation"
+ }},
+ {{
+ "documentId": "doc_2",
+ "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering
+ "extractionPrompt": "Extract text content from image using vision AI",
+ "reasoning": "Image contains text that needs extraction, but also should be rendered visually"
+ }},
+ {{
+ "documentId": "doc_3",
+ "intents": ["reference"],
+ "extractionPrompt": null,
+ "reasoning": "Document is only used as reference, no extraction needed"
+ }}
+ ]
+}}
+
+CRITICAL RULES:
+1. For images (mimeType starts with "image/"):
+ - If user wants to "include" or "show" images → add "render"
+ - If user wants to "analyze", "read text", or "extract text" from images → add "extract"
+ - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering
+
+2. For text documents:
+ - If user mentions "template" or "structure" → "reference" or "extract" based on context
+ - If user mentions "reference" or "context" → "reference"
+ - Default → "extract"
+
+3. Consider output format:
+ - For formats like PDF, DOCX, PPTX: images usually need "render"
+ - For formats like CSV, JSON: usually "extract" only
+ - For HTML: can have both "extract" and "render"
+
+Return ONLY valid JSON following the structure above.
+"""
+ return prompt
+
diff --git a/modules/services/serviceAi/subResponseParsing.py b/modules/services/serviceAi/subResponseParsing.py
new file mode 100644
index 00000000..a2d568d9
--- /dev/null
+++ b/modules/services/serviceAi/subResponseParsing.py
@@ -0,0 +1,275 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Response Parsing Module
+
+Handles parsing of AI responses, including:
+- Section extraction from responses
+- JSON completeness detection
+- Loop detection
+- Document metadata extraction
+- Final result building
+"""
+import json
+import logging
+from typing import Dict, Any, List, Optional, Tuple
+
+from modules.shared.jsonUtils import extractJsonString, repairBrokenJson, extractSectionsFromDocument
+from modules.services.serviceAi.subJsonResponseHandling import JsonResponseHandler
+from modules.datamodels.datamodelAi import JsonAccumulationState
+
+logger = logging.getLogger(__name__)
+
+
+class ResponseParser:
+ """Handles parsing of AI responses and completion detection."""
+
+ def __init__(self, services):
+ """Initialize ResponseParser with service center access."""
+ self.services = services
+
+ def extractSectionsFromResponse(
+ self,
+ result: str,
+ iteration: int,
+ debugPrefix: str,
+ allSections: List[Dict[str, Any]] = None,
+ accumulationState: Optional[JsonAccumulationState] = None
+ ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]:
+ """
+ Extract sections from AI response, handling both valid and broken JSON.
+
+ NEW BEHAVIOR:
+ - First iteration: Check if complete, if not start accumulation
+ - Subsequent iterations: Accumulate strings, parse when complete
+
+ Returns:
+ Tuple of:
+ - sections: Extracted sections
+ - wasJsonComplete: True if JSON is complete
+ - parsedResult: Parsed JSON object
+ - updatedAccumulationState: Updated accumulation state (None if not in accumulation mode)
+ """
+ if allSections is None:
+ allSections = []
+
+ if iteration == 1:
+ # First iteration - check if complete
+ parsed = None
+ try:
+ extracted = extractJsonString(result)
+ parsed = json.loads(extracted)
+
+ # Check completeness
+ if JsonResponseHandler.isJsonComplete(parsed):
+ # Complete JSON - no accumulation needed
+ sections = extractSectionsFromDocument(parsed)
+ logger.info(f"Iteration 1: Complete JSON detected, no accumulation needed")
+ return sections, True, parsed, None # No accumulation
+ except Exception:
+ pass
+
+ # Incomplete - try to extract partial sections from broken JSON
+ logger.info(f"Iteration 1: Incomplete JSON detected, attempting to extract partial sections")
+
+ partialSections = []
+ if parsed:
+ # Try to extract sections from parsed (even if incomplete)
+ partialSections = extractSectionsFromDocument(parsed)
+ else:
+ # Try to repair broken JSON and extract sections
+ try:
+ repaired = repairBrokenJson(result)
+ if repaired:
+ partialSections = extractSectionsFromDocument(repaired)
+ parsed = repaired # Use repaired version for accumulation state
+ except Exception:
+ pass # If repair fails, continue with empty sections
+
+
+ # Define KPIs (async call - need to handle this)
+ # For now, create accumulation state without KPIs, will be updated after async call
+ accumulationState = JsonAccumulationState(
+ accumulatedJsonString=result,
+ isAccumulationMode=True,
+ lastParsedResult=parsed,
+ allSections=partialSections,
+ kpis=[]
+ )
+
+ # Note: KPI definition will be done in the caller (async context)
+ return partialSections, False, parsed, accumulationState
+
+ else:
+ # Subsequent iterations - accumulate
+ if accumulationState and accumulationState.isAccumulationMode:
+ accumulated, sections, isComplete, parsedResult = \
+ JsonResponseHandler.accumulateAndParseJsonFragments(
+ accumulationState.accumulatedJsonString,
+ result,
+ allSections,
+ iteration
+ )
+
+ # Update accumulation state
+ accumulationState.accumulatedJsonString = accumulated
+ accumulationState.lastParsedResult = parsedResult
+ accumulationState.allSections = allSections + sections if sections else allSections
+ accumulationState.isAccumulationMode = not isComplete
+
+ # Log accumulated JSON for debugging
+ if parsedResult:
+ accumulated_json_str = json.dumps(parsedResult, indent=2, ensure_ascii=False)
+ self.services.utils.writeDebugFile(accumulated_json_str, f"{debugPrefix}_accumulated_json_iteration_{iteration}.json")
+
+ return sections, isComplete, parsedResult, accumulationState
+ else:
+ # No accumulation mode - process normally (shouldn't happen)
+ logger.warning(f"Iteration {iteration}: No accumulation state but iteration > 1")
+ return [], False, None, None
+
+ def shouldContinueGeneration(
+ self,
+ allSections: List[Dict[str, Any]],
+ iteration: int,
+ wasJsonComplete: bool,
+ rawResponse: str = None
+ ) -> bool:
+ """
+ Determine if AI generation loop should continue.
+
+ CRITICAL: This is ONLY about AI Loop Completion, NOT Action DoD!
+ Action DoD is checked AFTER the AI Loop completes in _refineDecide.
+
+ Simple logic:
+ - If JSON parsing failed or incomplete → continue (needs more content)
+ - If JSON parses successfully and is complete → stop (all content delivered)
+ - Loop detection prevents infinite loops
+
+ CRITICAL: JSON completeness is determined by parsing, NOT by last character check!
+ Returns True if we should continue, False if AI Loop is done.
+ """
+ if len(allSections) == 0:
+ return True # No sections yet, continue
+
+ # CRITERION 1: If JSON was incomplete/broken (parsing failed or incomplete) - continue to repair/complete
+ if not wasJsonComplete:
+ logger.info(f"Iteration {iteration}: JSON incomplete/broken - continuing to complete")
+ return True
+
+ # CRITERION 2: JSON is complete (parsed successfully) - check for loop detection
+ if self._isStuckInLoop(allSections, iteration):
+ logger.warning(f"Iteration {iteration}: Detected potential infinite loop - stopping AI loop")
+ return False
+
+ # JSON is complete and not stuck in loop - done
+ logger.info(f"Iteration {iteration}: JSON complete - AI loop done")
+ return False
+
+ def _isStuckInLoop(
+ self,
+ allSections: List[Dict[str, Any]],
+ iteration: int
+ ) -> bool:
+ """
+ Detect if we're stuck in a loop (same content being repeated).
+
+ Generic approach: Check if recent iterations are adding minimal or duplicate content.
+ """
+ if iteration < 3:
+ return False # Need at least 3 iterations to detect a loop
+
+ if len(allSections) == 0:
+ return False
+
+ # Check if last section is very small (might be stuck)
+ lastSection = allSections[-1]
+ elements = lastSection.get("elements", [])
+
+ if isinstance(elements, list) and elements:
+ lastElem = elements[-1] if elements else {}
+ else:
+ lastElem = elements if isinstance(elements, dict) else {}
+
+ # Check content size of last section
+ lastSectionSize = 0
+ if isinstance(lastElem, dict):
+ for key, value in lastElem.items():
+ if isinstance(value, str):
+ lastSectionSize += len(value)
+ elif isinstance(value, list):
+ lastSectionSize += len(str(value))
+
+ # If last section is very small and we've done many iterations, might be stuck
+ if lastSectionSize < 100 and iteration > 10:
+ logger.warning(f"Potential loop detected: iteration {iteration}, last section size {lastSectionSize}")
+ return True
+
+ return False
+
+ def extractDocumentMetadata(
+ self,
+ parsedResult: Dict[str, Any]
+ ) -> Optional[Dict[str, Any]]:
+ """
+ Extract document metadata (title, filename) from parsed AI response.
+ Returns dict with 'title' and 'filename' keys if found, None otherwise.
+ """
+ if not isinstance(parsedResult, dict):
+ return None
+
+ # Try to get from documents array (preferred structure)
+ if "documents" in parsedResult and isinstance(parsedResult["documents"], list) and len(parsedResult["documents"]) > 0:
+ firstDoc = parsedResult["documents"][0]
+ if isinstance(firstDoc, dict):
+ title = firstDoc.get("title")
+ filename = firstDoc.get("filename")
+ if title or filename:
+ return {
+ "title": title,
+ "filename": filename
+ }
+
+ return None
+
+ def buildFinalResultFromSections(
+ self,
+ allSections: List[Dict[str, Any]],
+ documentMetadata: Optional[Dict[str, Any]] = None
+ ) -> str:
+ """
+ Build final JSON result from accumulated sections.
+ Uses AI-provided metadata (title, filename) if available.
+ """
+ if not allSections:
+ return ""
+
+ # Extract metadata from AI response if available
+ title = "Generated Document"
+ filename = "document.json"
+ if documentMetadata:
+ if documentMetadata.get("title"):
+ title = documentMetadata["title"]
+ if documentMetadata.get("filename"):
+ filename = documentMetadata["filename"]
+
+ # Build documents structure
+ # Assuming single document for now
+ documents = [{
+ "id": "doc_1",
+ "title": title,
+ "filename": filename,
+ "sections": allSections
+ }]
+
+ result = {
+ "metadata": {
+ "split_strategy": "single_document",
+ "source_documents": [],
+ "extraction_method": "ai_generation"
+ },
+ "documents": documents
+ }
+
+ return json.dumps(result, indent=2)
+
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
new file mode 100644
index 00000000..cc45b099
--- /dev/null
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -0,0 +1,546 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Structure Filling Module
+
+Handles filling document structure with content, including:
+- Filling sections with content parts
+- Building section generation prompts
+- Aggregation logic
+"""
+import json
+import logging
+import copy
+from typing import Dict, Any, List, Optional
+
+from modules.datamodels.datamodelExtraction import ContentPart
+from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
+
+logger = logging.getLogger(__name__)
+
+
+class StructureFiller:
+ """Handles filling document structure with content."""
+
+ def __init__(self, services, aiService):
+ """Initialize StructureFiller with service center and AI service access."""
+ self.services = services
+ self.aiService = aiService
+
+ async def fillStructure(
+ self,
+ structure: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str,
+ parentOperationId: str
+ ) -> Dict[str, Any]:
+ """
+ Phase 5D: Füllt Struktur mit tatsächlichem Content.
+ Für jede Section:
+ - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format
+ - Wenn generation_hint spezifiziert: Generiere AI-Content
+
+ **Implementierungsdetails:**
+ - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung)
+ - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses)
+
+ Args:
+ structure: Struktur-Dict mit documents und sections
+ contentParts: Alle vorbereiteten ContentParts
+ userPrompt: User-Anfrage
+ parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
+
+ Returns:
+ Gefüllte Struktur mit elements in jeder Section
+ """
+ # Erstelle Operation-ID für Struktur-Abfüllen
+ fillOperationId = f"{parentOperationId}_structure_filling"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ fillOperationId,
+ "Structure Filling",
+ "Filling",
+ f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ filledStructure = copy.deepcopy(structure)
+
+ # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden)
+ sections_to_process = []
+ all_sections_list = [] # Für Kontext-Informationen
+ for doc in filledStructure.get("documents", []):
+ doc_sections = doc.get("sections", [])
+ all_sections_list.extend(doc_sections)
+ for section in doc_sections:
+ sections_to_process.append((doc, section))
+
+ # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden)
+ for sectionIndex, (doc, section) in enumerate(sections_to_process):
+ sectionId = section.get("id")
+ contentPartIds = section.get("contentPartIds", [])
+ contentFormats = section.get("contentFormats", {})
+ generationHint = section.get("generation_hint")
+ contentType = section.get("content_type", "paragraph")
+
+ elements = []
+
+ # Prüfe ob Aggregation nötig ist
+ needsAggregation = self._needsAggregation(
+ contentType=contentType,
+ contentPartCount=len(contentPartIds)
+ )
+
+ if needsAggregation and generationHint:
+ # Aggregation: Alle Parts zusammen verarbeiten
+ sectionParts = [
+ self._findContentPartById(pid, contentParts)
+ for pid in contentPartIds
+ ]
+ sectionParts = [p for p in sectionParts if p is not None]
+
+ if sectionParts:
+ # Filtere nur extracted Parts für Aggregation (reference/object werden separat behandelt)
+ extractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
+ ]
+ nonExtractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
+ ]
+
+ # Verarbeite non-extracted Parts separat (reference, object)
+ for part in nonExtractedParts:
+ contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+ elif contentFormat == "object":
+ elements.append({
+ "type": part.typeGroup,
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
+
+ # Aggregiere extracted Parts mit AI
+ if extractedParts:
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=extractedParts, # ALLE PARTS für Aggregation!
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=True
+ )
+
+ # Erstelle Operation-ID für Section-Generierung
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation (Aggregation)",
+ "Section",
+ f"Generating section {sectionId} with {len(extractedParts)} parts",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ # Verwende callAi für ContentParts-Unterstützung (nicht callAiPlanning!)
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=extractedParts, # ALLE PARTS!
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.aiService.callAi(request)
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ # Parse und füge zu elements hinzu
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!)
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+ # NICHT raise - Section wird mit Fehlermeldung gerendert
+
+ else:
+ # Einzelverarbeitung: Jeder Part einzeln
+ for partId in contentPartIds:
+ part = self._findContentPartById(partId, contentParts)
+ if not part:
+ continue
+
+ contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ # Füge Dokument-Referenz hinzu
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "object":
+ # Füge base64 Object hinzu
+ elements.append({
+ "type": part.typeGroup, # "image", "binary", etc.
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "extracted":
+ if generationHint:
+ # AI-Call mit einzelnen ContentPart
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=[part], # EIN PART
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=False
+ )
+
+ # Erstelle Operation-ID für Section-Generierung
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation",
+ "Section",
+ f"Generating section {sectionId}",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ # Verwende callAi für ContentParts-Unterstützung
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=[part],
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.aiService.callAi(request)
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ # Parse und füge zu elements hinzu
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!)
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+ # NICHT raise - Section wird mit Fehlermeldung gerendert
+ else:
+ # Füge extrahierten Text direkt hinzu (kein AI-Call)
+ elements.append({
+ "type": "extracted_text",
+ "content": part.data,
+ "source": part.metadata.get("documentId"),
+ "extractionPrompt": part.metadata.get("extractionPrompt")
+ })
+
+ section["elements"] = elements
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(fillOperationId, True)
+
+ return filledStructure
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(fillOperationId, False)
+ logger.error(f"Error in fillStructure: {str(e)}")
+ raise
+
+ def _buildSectionGenerationPrompt(
+ self,
+ section: Dict[str, Any],
+ contentParts: List[Optional[ContentPart]],
+ userPrompt: str,
+ generationHint: str,
+ allSections: Optional[List[Dict[str, Any]]] = None,
+ sectionIndex: Optional[int] = None,
+ isAggregation: bool = False
+ ) -> str:
+ """Baue Prompt für Section-Generierung mit vollständigem Kontext."""
+ # Filtere None-Werte
+ validParts = [p for p in contentParts if p is not None]
+
+ # Section-Metadaten
+ sectionId = section.get("id", "unknown")
+ contentType = section.get("content_type", "paragraph")
+
+ # Baue ContentParts-Beschreibung
+ contentPartsText = ""
+ if isAggregation:
+ # Aggregation: Zeige nur Metadaten, nicht Previews
+ contentPartsText += f"\n## CONTENT PARTS (Aggregation)\n"
+ contentPartsText += f"- Anzahl: {len(validParts)} ContentParts\n"
+ contentPartsText += f"- Alle ContentParts werden als Parameter übergeben (nicht im Prompt!)\n"
+ contentPartsText += f"- Jeder Part kann sehr groß sein → Chunking automatisch\n"
+ contentPartsText += f"- WICHTIG: Aggregiere ALLE Parts zu einem Element (z.B. eine Tabelle)\n\n"
+ contentPartsText += f"ContentPart IDs:\n"
+ for part in validParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ contentPartsText += f" - {part.id} (Format: {contentFormat}, Type: {part.typeGroup}"
+ if part.metadata.get("originalFileName"):
+ contentPartsText += f", Source: {part.metadata.get('originalFileName')}"
+ contentPartsText += ")\n"
+ else:
+ # Einzelverarbeitung: Zeige Previews
+ for part in validParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ contentPartsText += f"\n- ContentPart {part.id}:\n"
+ contentPartsText += f" Format: {contentFormat}\n"
+ contentPartsText += f" Type: {part.typeGroup}\n"
+ if part.metadata.get("originalFileName"):
+ contentPartsText += f" Source file: {part.metadata.get('originalFileName')}\n"
+
+ if contentFormat == "extracted":
+ # Zeige Preview von extrahiertem Text (länger für besseren Kontext)
+ previewLength = 1000
+ if part.data:
+ preview = part.data[:previewLength] + "..." if len(part.data) > previewLength else part.data
+ contentPartsText += f" Content preview:\n```\n{preview}\n```\n"
+ else:
+ contentPartsText += f" Content: (empty)\n"
+ elif contentFormat == "reference":
+ contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n"
+ if part.metadata.get("usageHint"):
+ contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
+ elif contentFormat == "object":
+ dataLength = len(part.data) if part.data else 0
+ contentPartsText += f" Object type: {part.typeGroup}\n"
+ contentPartsText += f" MIME type: {part.mimeType}\n"
+ contentPartsText += f" Data size: {dataLength} chars (base64 encoded)\n"
+ if part.metadata.get("usageHint"):
+ contentPartsText += f" Usage hint: {part.metadata.get('usageHint')}\n"
+
+ # Baue Section-Kontext (vorherige und nachfolgende Sections)
+ contextText = ""
+ if allSections and sectionIndex is not None:
+ prevSections = []
+ nextSections = []
+
+ if sectionIndex > 0:
+ for i in range(max(0, sectionIndex - 2), sectionIndex):
+ prevSection = allSections[i]
+ prevSections.append({
+ "id": prevSection.get("id"),
+ "content_type": prevSection.get("content_type"),
+ "generation_hint": prevSection.get("generation_hint", "")[:100]
+ })
+
+ if sectionIndex < len(allSections) - 1:
+ for i in range(sectionIndex + 1, min(len(allSections), sectionIndex + 3)):
+ nextSection = allSections[i]
+ nextSections.append({
+ "id": nextSection.get("id"),
+ "content_type": nextSection.get("content_type"),
+ "generation_hint": nextSection.get("generation_hint", "")[:100]
+ })
+
+ if prevSections or nextSections:
+ contextText = "\n## DOCUMENT CONTEXT\n"
+ if prevSections:
+ contextText += "\nPrevious sections:\n"
+ for prev in prevSections:
+ contextText += f"- {prev['id']} ({prev['content_type']}): {prev['generation_hint']}\n"
+ if nextSections:
+ contextText += "\nFollowing sections:\n"
+ for next in nextSections:
+ contextText += f"- {next['id']} ({next['content_type']}): {next['generation_hint']}\n"
+
+ if isAggregation:
+ prompt = f"""# TASK: Generate Section Content (Aggregation)
+
+## SECTION METADATA
+- Section ID: {sectionId}
+- Content Type: {contentType}
+- Generation Hint: {generationHint}
+{contextText}
+
+## USER REQUEST (for context)
+```
+{userPrompt}
+```
+
+## AVAILABLE CONTENT FOR THIS SECTION
+{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
+
+## INSTRUCTIONS
+1. Generate content for section "{sectionId}" based on the generation hint above
+2. **AGGREGATION**: Combine ALL provided ContentParts into ONE element (e.g., one table with all data)
+3. For table content_type: Create a single table with headers and rows from all ContentParts
+4. For bullet_list content_type: Create a single list with items from all ContentParts
+5. Format appropriately based on content_type ({contentType})
+6. Ensure the generated content fits logically between previous and following sections
+7. Return ONLY a JSON object with an "elements" array
+8. Each element should match the content_type: {contentType}
+
+## OUTPUT FORMAT
+Return a JSON object with this structure:
+```json
+{{
+ "elements": [
+ {{
+ "type": "{contentType}",
+ "headers": [...], // if table
+ "rows": [...], // if table
+ "items": [...], // if bullet_list
+ "content": "..." // if paragraph
+ }}
+ ]
+}}
+```
+
+CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
+"""
+ else:
+ prompt = f"""# TASK: Generate Section Content
+
+## SECTION METADATA
+- Section ID: {sectionId}
+- Content Type: {contentType}
+- Generation Hint: {generationHint}
+{contextText}
+
+## USER REQUEST (for context)
+```
+{userPrompt}
+```
+
+## AVAILABLE CONTENT FOR THIS SECTION
+{contentPartsText if contentPartsText else "(No content parts specified for this section)"}
+
+## INSTRUCTIONS
+1. Generate content for section "{sectionId}" based on the generation hint above
+2. Use the available content parts to populate this section
+3. For images: Use data URI format (data:image/[type];base64,[data]) when embedding base64 image data
+4. For extracted text: Format appropriately based on content_type ({contentType})
+5. Ensure the generated content fits logically between previous and following sections
+6. Return ONLY a JSON object with an "elements" array
+7. Each element should match the content_type: {contentType}
+
+## OUTPUT FORMAT
+Return a JSON object with this structure:
+```json
+{{
+ "elements": [
+ {{
+ "type": "{contentType}",
+ "content": "..."
+ }}
+ ]
+}}
+```
+
+CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
+"""
+ return prompt
+
+ def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]:
+ """Finde ContentPart nach ID."""
+ for part in contentParts:
+ if part.id == partId:
+ return part
+ return None
+
+ def _needsAggregation(
+ self,
+ contentType: str,
+ contentPartCount: int
+ ) -> bool:
+ """
+ Bestimmt ob mehrere ContentParts aggregiert werden müssen.
+
+ Aggregation nötig wenn:
+ - content_type erfordert Aggregation (table, bullet_list)
+ - UND mehrere ContentParts vorhanden sind (> 1)
+
+ Args:
+ contentType: Section content_type
+ contentPartCount: Anzahl der ContentParts in dieser Section
+
+ Returns:
+ True wenn Aggregation nötig, False sonst
+ """
+ aggregationTypes = ["table", "bullet_list"]
+
+ if contentType in aggregationTypes and contentPartCount > 1:
+ return True
+
+ # Optional: Auch für paragraph wenn mehrere Parts vorhanden
+ # (z.B. Vergleich mehrerer Dokumente)
+ # Standard: Keine Aggregation für paragraph
+ return False
+
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
new file mode 100644
index 00000000..eb39fdd6
--- /dev/null
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -0,0 +1,229 @@
+# Copyright (c) 2025 Patrick Motsch
+# All rights reserved.
+"""
+Structure Generation Module
+
+Handles document structure generation, including:
+- Generating document structure with sections
+- Building structure prompts
+"""
+import json
+import logging
+from typing import Dict, Any, List
+
+from modules.datamodels.datamodelExtraction import ContentPart
+
+logger = logging.getLogger(__name__)
+
+
+class StructureGenerator:
+ """Handles document structure generation."""
+
+ def __init__(self, services, aiService):
+ """Initialize StructureGenerator with service center and AI service access."""
+ self.services = services
+ self.aiService = aiService
+
+ async def generateStructure(
+ self,
+ userPrompt: str,
+ contentParts: List[ContentPart],
+ outputFormat: str,
+ parentOperationId: str
+ ) -> Dict[str, Any]:
+ """
+ Phase 5C: Generiert Dokument-Struktur mit Sections.
+ Jede Section spezifiziert:
+ - Welcher Content sollte in dieser Section sein
+ - Welche ContentParts zu verwenden sind
+ - Format für jeden ContentPart
+
+ Args:
+ userPrompt: User-Anfrage
+ contentParts: Alle vorbereiteten ContentParts mit Metadaten
+ outputFormat: Ziel-Format (html, docx, pdf, etc.)
+ parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
+
+ Returns:
+ Struktur-Dict mit documents und sections
+ """
+ # Erstelle Operation-ID für Struktur-Generierung
+ structureOperationId = f"{parentOperationId}_structure_generation"
+
+ # Starte ChatLog mit Parent-Referenz
+ self.services.chat.progressLogStart(
+ structureOperationId,
+ "Structure Generation",
+ "Structure",
+ f"Generating structure for {outputFormat}",
+ parentOperationId=parentOperationId
+ )
+
+ try:
+ # Baue Struktur-Prompt mit Content-Index
+ structurePrompt = self._buildStructurePrompt(
+ userPrompt=userPrompt,
+ contentParts=contentParts,
+ outputFormat=outputFormat
+ )
+
+ # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses)
+ # Debug-Logs werden bereits von callAiPlanning geschrieben
+ aiResponse = await self.aiService.callAiPlanning(
+ prompt=structurePrompt,
+ debugType="document_generation_structure"
+ )
+
+ # Parse Struktur
+ structure = json.loads(self.services.utils.jsonExtractString(aiResponse))
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(structureOperationId, True)
+
+ return structure
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(structureOperationId, False)
+ logger.error(f"Error in generateStructure: {str(e)}")
+ raise
+
+ def _buildStructurePrompt(
+ self,
+ userPrompt: str,
+ contentParts: List[ContentPart],
+ outputFormat: str
+ ) -> str:
+ """Baue Prompt für Struktur-Generierung."""
+ # Baue ContentParts-Index - filtere leere Parts heraus
+ contentPartsIndex = ""
+ validParts = []
+ filteredParts = []
+
+ for part in contentParts:
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+
+ # WICHTIG: Reference Parts haben absichtlich leere Daten - immer einschließen
+ if contentFormat == "reference":
+ validParts.append(part)
+ logger.debug(f"Including reference ContentPart {part.id} (intentionally empty data)")
+ continue
+
+ # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt)
+ # ABER: Reference Parts wurden bereits oben behandelt
+ if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0):
+ # Überspringe Container-Parts ohne Daten
+ if part.typeGroup == "container" and not part.data:
+ filteredParts.append((part.id, "container without data"))
+ continue
+ # Überspringe andere leere Parts (aber nicht Reference, die wurden bereits behandelt)
+ if not part.data:
+ filteredParts.append((part.id, f"no data (format: {contentFormat})"))
+ continue
+
+ validParts.append(part)
+ logger.debug(f"Including ContentPart {part.id}: format={contentFormat}, type={part.typeGroup}, dataLength={len(str(part.data)) if part.data else 0}")
+
+ if filteredParts:
+ logger.debug(f"Filtered out {len(filteredParts)} empty ContentParts: {filteredParts}")
+
+ logger.info(f"Building structure prompt with {len(validParts)} valid ContentParts (from {len(contentParts)} total)")
+
+ # Baue Index nur für gültige Parts
+ for i, part in enumerate(validParts, 1):
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ dataPreview = ""
+
+ if contentFormat == "extracted":
+ # Für Image-Parts: Zeige dass es ein Image ist
+ if part.typeGroup == "image":
+ dataLength = len(part.data) if part.data else 0
+ mimeType = part.mimeType or "image"
+ dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content"
+ elif part.typeGroup == "container":
+ # Container ohne Daten überspringen wir bereits oben
+ dataPreview = "Container structure (no text content)"
+ else:
+ # Zeige Preview von extrahiertem Text
+ if part.data:
+ preview = part.data[:200] + "..." if len(part.data) > 200 else part.data
+ dataPreview = preview
+ else:
+ dataPreview = "(empty)"
+ elif contentFormat == "object":
+ dataLength = len(part.data) if part.data else 0
+ mimeType = part.mimeType or "binary"
+ if part.typeGroup == "image":
+ dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)"
+ else:
+ dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)"
+ elif contentFormat == "reference":
+ dataPreview = part.metadata.get("documentReference", "reference")
+
+ originalFileName = part.metadata.get('originalFileName', 'N/A')
+
+ contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n"
+ contentPartsIndex += f" Format: {contentFormat}\n"
+ contentPartsIndex += f" Type: {part.typeGroup}\n"
+ contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n"
+ contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n"
+ contentPartsIndex += f" Original file name: {originalFileName}\n"
+ contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n"
+ contentPartsIndex += f" Data preview: {dataPreview}\n"
+
+ if not contentPartsIndex:
+ contentPartsIndex = "\n(No content parts available)"
+
+ prompt = f"""USER REQUEST:
+{userPrompt}
+
+AVAILABLE CONTENT PARTS:
+{contentPartsIndex}
+
+TASK: Generiere Dokument-Struktur mit Sections.
+Für jede Section, spezifiziere:
+- section id
+- content_type (heading, paragraph, image, table, etc.)
+- contentPartIds: [Liste von ContentPart-IDs zu verwenden]
+- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist
+- generation_hint: Was AI für diese Section generieren soll
+- elements: [] (leer, wird in nächster Phase gefüllt)
+
+OUTPUT FORMAT: {outputFormat}
+
+RETURN JSON:
+{{
+ "metadata": {{
+ "title": "Document Title",
+ "language": "de"
+ }},
+ "documents": [{{
+ "id": "doc_1",
+ "title": "Document Title",
+ "filename": "document.{outputFormat}",
+ "sections": [
+ {{
+ "id": "section_1",
+ "content_type": "heading",
+ "generation_hint": "Main title",
+ "contentPartIds": [],
+ "contentFormats": {{}},
+ "elements": []
+ }},
+ {{
+ "id": "section_2",
+ "content_type": "paragraph",
+ "generation_hint": "Introduction paragraph",
+ "contentPartIds": ["part_ext_1"],
+ "contentFormats": {{
+ "part_ext_1": "extracted"
+ }},
+ "elements": []
+ }}
+ ]
+ }}]
+}}
+
+Return ONLY valid JSON following the structure above.
+"""
+ return prompt
+
diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py
index a2972453..ba4bfb69 100644
--- a/modules/services/serviceExtraction/mainServiceExtraction.py
+++ b/modules/services/serviceExtraction/mainServiceExtraction.py
@@ -856,7 +856,10 @@ class ExtractionService:
merged_parts = applyMerging(content_parts, merge_strategy)
# Phase 6: Enhanced format with metadata preservation
- # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing
+ # CRITICAL: Don't add SOURCE markers for internal use - metadata is already preserved in ContentPart objects
+ # SOURCE markers should ONLY be added when content is returned directly to user for display/debugging
+ # For extraction content used in generation pipelines, metadata is in ContentPart.metadata, not in text markers
+
# Check if this is a generation response by looking at operationType or content structure
isGenerationResponse = False
if options and hasattr(options, 'operationType'):
@@ -880,23 +883,14 @@ class ExtractionService:
except:
pass
+ # ROOT CAUSE FIX: Never add SOURCE markers - metadata is preserved in ContentPart.metadata
+ # SOURCE markers pollute content and cause issues when content is used in generation pipelines
+ # If traceability is needed, use ContentPart.metadata fields (documentId, documentMimeType, label, etc.)
content_sections = []
for part in merged_parts:
- if isGenerationResponse:
- # For generation responses, return JSON directly without SOURCE markers
- content_sections.append(part.data)
- else:
- # For extraction responses, include metadata in section header for traceability
- doc_id = part.metadata.get("documentId", "unknown")
- doc_mime = part.metadata.get("documentMimeType", "unknown")
- label = part.label or "content"
-
- section = f"""
-[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}]
-{part.data}
-[END SOURCE]
-"""
- content_sections.append(section)
+ # Always return clean content without SOURCE markers
+ # Metadata is available in ContentPart.metadata for traceability
+ content_sections.append(part.data if part.data else "")
final_content = "\n\n".join(content_sections)
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 54c7e64b..213bf641 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -299,36 +299,14 @@ class RendererHtml(BaseRenderer):
def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str:
"""Render a single JSON section to HTML using AI-generated styles.
Supports three content formats: reference, object (base64), extracted_text.
+ WICHTIG: Respektiert sectionType (content_type) für korrekte Rendering-Logik.
"""
try:
sectionType = self._getSectionType(section)
sectionData = self._getSectionData(section)
- # Check for three content formats from Phase 5D in elements
- if isinstance(sectionData, list):
- htmlParts = []
- for element in sectionData:
- element_type = element.get("type", "") if isinstance(element, dict) else ""
-
- # Support three content formats from Phase 5D
- if element_type == "reference":
- # Document reference format
- doc_ref = element.get("documentReference", "")
- label = element.get("label", "Reference")
- htmlParts.append(f'[Reference: {label}]
')
- continue
- elif element_type == "extracted_text":
- # Extracted text format
- content = element.get("content", "")
- source = element.get("source", "")
- if content:
- source_text = f' (Source: {source})' if source else ''
- htmlParts.append(f'')
- continue
-
- # If we processed reference/extracted_text elements, return them
- if htmlParts:
- return '\n'.join(htmlParts)
+ # WICHTIG: Respektiere sectionType (content_type) ZUERST, dann process elements entsprechend
+ # Process elements according to section's content_type, not just element types
if sectionType == "table":
# Process the section data to extract table structure
@@ -339,8 +317,58 @@ class RendererHtml(BaseRenderer):
processedData = self._processSectionByType(section)
return self._renderJsonBulletList(processedData, styles)
elif sectionType == "heading":
+ # Extract text from elements for heading rendering
+ if isinstance(sectionData, list):
+ # Extract text from heading elements
+ headingText = ""
+ for element in sectionData:
+ if isinstance(element, dict):
+ element_type = element.get("type", "")
+ if element_type == "heading":
+ headingText = element.get("content", element.get("text", ""))
+ break
+ elif element_type == "extracted_text":
+ # Use extracted text as heading if no heading element found
+ content = element.get("content", "")
+ if content and not headingText:
+ # Extract first line or title from extracted text
+ headingText = content.split('\n')[0].strip()
+ # Remove markdown formatting
+ headingText = headingText.replace('#', '').replace('**', '').strip()
+ break
+ elif "text" in element:
+ headingText = element.get("text", "")
+ break
+ if headingText:
+ return self._renderJsonHeading({"text": headingText, "level": 2}, styles)
return self._renderJsonHeading(sectionData, styles)
elif sectionType == "paragraph":
+ # Process paragraph elements, including extracted_text
+ if isinstance(sectionData, list):
+ htmlParts = []
+ for element in sectionData:
+ element_type = element.get("type", "") if isinstance(element, dict) else ""
+
+ if element_type == "reference":
+ doc_ref = element.get("documentReference", "")
+ label = element.get("label", "Reference")
+ htmlParts.append(f'[Reference: {label}]
')
+ elif element_type == "extracted_text":
+ content = element.get("content", "")
+ source = element.get("source", "")
+ if content:
+ source_text = f' (Source: {source})' if source else ''
+ htmlParts.append(f'')
+ elif isinstance(element, dict):
+ # Regular paragraph element
+ text = element.get("text", element.get("content", ""))
+ if text:
+ htmlParts.append(f'{text}
')
+ elif isinstance(element, str):
+ htmlParts.append(f'{element}
')
+
+ if htmlParts:
+ return '\n'.join(htmlParts)
return self._renderJsonParagraph(sectionData, styles)
elif sectionType == "code_block":
# Process the section data to extract code block structure
@@ -351,6 +379,25 @@ class RendererHtml(BaseRenderer):
processedData = self._processSectionByType(section)
return self._renderJsonImage(processedData, styles)
else:
+ # Fallback: Check for special element types first
+ if isinstance(sectionData, list):
+ htmlParts = []
+ for element in sectionData:
+ element_type = element.get("type", "") if isinstance(element, dict) else ""
+
+ if element_type == "reference":
+ doc_ref = element.get("documentReference", "")
+ label = element.get("label", "Reference")
+ htmlParts.append(f'[Reference: {label}]
')
+ elif element_type == "extracted_text":
+ content = element.get("content", "")
+ source = element.get("source", "")
+ if content:
+ source_text = f' (Source: {source})' if source else ''
+ htmlParts.append(f'')
+
+ if htmlParts:
+ return '\n'.join(htmlParts)
# Fallback to paragraph for unknown types
return self._renderJsonParagraph(sectionData, styles)
diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py
index 49860665..3e33c996 100644
--- a/tests/functional/test09_document_generation_formats.py
+++ b/tests/functional/test09_document_generation_formats.py
@@ -214,14 +214,14 @@ class DocumentGenerationFormatsTester:
self.workflow = workflow
print(f"Workflow started: {workflow.id}")
- # Wait for workflow completion
+ # Wait for workflow completion (no timeout - wait indefinitely)
print(f"Waiting for workflow completion...")
- completed = await self.waitForWorkflowCompletion(timeout=300) # 5 minute timeout
+ completed = await self.waitForWorkflowCompletion(timeout=None)
if not completed:
return {
"success": False,
- "error": "Workflow did not complete within timeout",
+ "error": "Workflow did not complete",
"workflowId": workflow.id,
"status": workflow.status if workflow else "unknown"
}
@@ -243,7 +243,7 @@ class DocumentGenerationFormatsTester:
"results": results
}
- async def waitForWorkflowCompletion(self, timeout: int = 300, checkInterval: int = 2) -> bool:
+ async def waitForWorkflowCompletion(self, timeout: Optional[int] = None, checkInterval: int = 2) -> bool:
"""Wait for workflow to complete."""
if not self.workflow:
return False
@@ -253,9 +253,12 @@ class DocumentGenerationFormatsTester:
interfaceDbChat = interfaceDbChatObjects.getInterface(self.testUser)
+ if timeout is None:
+ print("Waiting indefinitely (no timeout)")
+
while True:
- # Check timeout
- if time.time() - startTime > timeout:
+ # Check timeout only if specified
+ if timeout is not None and time.time() - startTime > timeout:
print(f"\n⏱️ Timeout after {timeout} seconds")
return False
@@ -455,13 +458,13 @@ class DocumentGenerationFormatsTester:
self.workflow = workflow
print(f"Workflow started: {workflow.id}")
- # Wait for workflow completion
- completed = await self.waitForWorkflowCompletion(timeout=300)
+ # Wait for workflow completion (no timeout - wait indefinitely)
+ completed = await self.waitForWorkflowCompletion(timeout=None)
if not completed:
results[testType] = {
"success": False,
- "error": "Workflow did not complete within timeout",
+ "error": "Workflow did not complete",
"workflowId": workflow.id
}
continue