From 723f98ea7a1d3413a48f9df8e9bbb1f888ae511f Mon Sep 17 00:00:00 2001
From: ValueOn AG
Date: Sun, 28 Dec 2025 11:43:42 +0100
Subject: [PATCH] enhanced generation engine with chapters as structure,
renderers to render a pipeline and deliver 1..n documents
---
modules/datamodels/datamodelDocument.py | 12 +
modules/services/serviceAi/mainServiceAi.py | 98 ++-
.../services/serviceAi/subStructureFilling.py | 605 ++++++++++++++++--
.../serviceAi/subStructureGeneration.py | 88 +--
.../mainServiceGeneration.py | 96 ++-
.../renderers/rendererBaseTemplate.py | 39 +-
.../renderers/rendererCsv.py | 22 +-
.../renderers/rendererDocx.py | 43 +-
.../renderers/rendererHtml.py | 134 +++-
.../renderers/rendererImage.py | 32 +-
.../renderers/rendererJson.py | 31 +-
.../renderers/rendererMarkdown.py | 31 +-
.../renderers/rendererPdf.py | 44 +-
.../renderers/rendererPptx.py | 41 +-
.../renderers/rendererText.py | 31 +-
.../renderers/rendererXlsx.py | 34 +-
.../processing/adaptive/contentValidator.py | 24 +
17 files changed, 1141 insertions(+), 264 deletions(-)
diff --git a/modules/datamodels/datamodelDocument.py b/modules/datamodels/datamodelDocument.py
index 3f2f8f8e..2f5af99a 100644
--- a/modules/datamodels/datamodelDocument.py
+++ b/modules/datamodels/datamodelDocument.py
@@ -107,5 +107,17 @@ class StructuredDocument(BaseModel):
+class RenderedDocument(BaseModel):
+ """A single rendered document from a renderer."""
+ documentData: bytes = Field(description="Document content as bytes")
+ mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')")
+ filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')")
+
+ class Config:
+ json_encoders = {
+ bytes: lambda v: v.decode('utf-8', errors='replace') if isinstance(v, bytes) else v
+ }
+
+
# Update forward references
ListItem.model_rebuild()
diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py
index 777e6230..9839093d 100644
--- a/modules/services/serviceAi/mainServiceAi.py
+++ b/modules/services/serviceAi/mainServiceAi.py
@@ -11,6 +11,7 @@ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionS
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
+from modules.datamodels.datamodelDocument import RenderedDocument
from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.jsonUtils import (
extractJsonString,
@@ -50,7 +51,7 @@ class AiService:
if self.extractionService is None:
logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services)
-
+
# Initialize new submodules
from modules.services.serviceAi.subResponseParsing import ResponseParser
from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
@@ -277,7 +278,7 @@ Respond with ONLY a JSON object in this exact format:
) -> str:
"""Delegate to ResponseParser."""
return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
-
+
# Public API Methods
# Planning AI Call
@@ -494,20 +495,21 @@ Respond with ONLY a JSON object in this exact format:
title: str,
userPrompt: str,
parentOperationId: str
- ) -> Tuple[bytes, str]:
+ ) -> List[RenderedDocument]:
"""
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
- Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert.
+ Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
Args:
filledStructure: Gefüllte Struktur mit elements
- outputFormat: Ziel-Format (pdf, docx, html, etc.)
+ outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
title: Dokument-Titel
userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
-
+
Returns:
- Tuple von (renderedContent, mimeType)
+ List of RenderedDocument objects.
+ Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
"""
# Erstelle Operation-ID für Rendering
renderOperationId = f"{parentOperationId}_rendering"
@@ -526,51 +528,21 @@ Respond with ONLY a JSON object in this exact format:
generationService = GenerationService(self.services)
- # Multi-Dokument-Rendering
- documents = filledStructure.get("documents", [])
-
- if len(documents) == 1:
- # Einzelnes Dokument - wie bisher
- renderedContent, mimeType, images = await generationService.renderReport(
- filledStructure,
- outputFormat,
- title,
- userPrompt,
- self,
- parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
- )
- else:
- # Mehrere Dokumente - rendere alle
- # Option: Alle Sections zusammenführen und als ein Dokument rendern
- all_sections = []
- for doc in documents:
- if "sections" in doc:
- all_sections.extend(doc.get("sections", []))
-
- # Erstelle temporäres Dokument mit allen Sections
- merged_document = {
- "metadata": filledStructure["metadata"],
- "documents": [{
- "id": "merged",
- "title": title,
- "filename": f"{title}.{outputFormat}",
- "sections": all_sections
- }]
- }
-
- renderedContent, mimeType, images = await generationService.renderReport(
- merged_document,
- outputFormat,
- title,
- userPrompt,
- self,
- parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
- )
+ # renderReport verarbeitet jetzt jedes Dokument einzeln
+ # und gibt Liste von (documentData, mimeType, filename) zurück
+ renderedDocuments = await generationService.renderReport(
+ filledStructure,
+ outputFormat,
+ title,
+ userPrompt,
+ self,
+ parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
+ )
# ChatLog abschließen
self.services.chat.progressLogFinish(renderOperationId, True)
- return renderedContent, mimeType
+ return renderedDocuments
except Exception as e:
self.services.chat.progressLogFinish(renderOperationId, False)
@@ -712,7 +684,8 @@ Respond with ONLY a JSON object in this exact format:
)
# Schritt 5E: Rendere Resultat
- renderedContent, mimeType = await self._renderResult(
+ # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
+ renderedDocuments = await self._renderResult(
filledStructure,
outputFormat,
title or "Generated Document",
@@ -720,15 +693,24 @@ Respond with ONLY a JSON object in this exact format:
aiOperationId
)
- # Baue Response
- documentName = self._determineDocumentName(filledStructure, outputFormat, title)
+ # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
+ documentDataList = []
+ for renderedDoc in renderedDocuments:
+ try:
+ # Erstelle DocumentData für jedes gerenderte Dokument
+ docDataObj = DocumentData(
+ documentName=renderedDoc.filename,
+ documentData=renderedDoc.documentData,
+ mimeType=renderedDoc.mimeType,
+ sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
+ )
+ documentDataList.append(docDataObj)
+ logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
+ except Exception as e:
+ logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
- docData = DocumentData(
- documentName=documentName,
- documentData=renderedContent,
- mimeType=mimeType,
- sourceJson=filledStructure
- )
+ if not documentDataList:
+ raise ValueError("No documents were rendered")
metadata = AiResponseMetadata(
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
@@ -746,7 +728,7 @@ Respond with ONLY a JSON object in this exact format:
return AiResponse(
content=json.dumps(filledStructure),
metadata=metadata,
- documents=[docData]
+ documents=documentDataList
)
except Exception as e:
diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py
index cc45b099..d93264af 100644
--- a/modules/services/serviceAi/subStructureFilling.py
+++ b/modules/services/serviceAi/subStructureFilling.py
@@ -35,65 +35,184 @@ class StructureFiller:
parentOperationId: str
) -> Dict[str, Any]:
"""
- Phase 5D: Füllt Struktur mit tatsächlichem Content.
- Für jede Section:
- - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format
- - Wenn generation_hint spezifiziert: Generiere AI-Content
+ Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz).
- **Implementierungsdetails:**
- - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung)
- - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses)
+ Phase 5D.1: Generiert Sections-Struktur für jedes Chapter
+ Phase 5D.2: Füllt Sections mit ContentParts
Args:
- structure: Struktur-Dict mit documents und sections
+ structure: Struktur-Dict mit documents und chapters (nicht sections!)
contentParts: Alle vorbereiteten ContentParts
userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
- Gefüllte Struktur mit elements in jeder Section
+ Gefüllte Struktur mit elements in jeder Section (nach Flattening)
"""
# Erstelle Operation-ID für Struktur-Abfüllen
fillOperationId = f"{parentOperationId}_structure_filling"
+ # Prüfe ob Struktur Chapters oder Sections hat
+ hasChapters = False
+ for doc in structure.get("documents", []):
+ if "chapters" in doc:
+ hasChapters = True
+ break
+
+ if not hasChapters:
+ # Fallback: Alte Struktur mit Sections direkt - verwende alte Logik
+ logger.warning("Structure has no chapters, using legacy section-based filling")
+ return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId)
+
# Starte ChatLog mit Parent-Referenz
+ chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", []))
self.services.chat.progressLogStart(
fillOperationId,
- "Structure Filling",
+ "Chapter Content Generation",
"Filling",
- f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
+ f"Processing {chapterCount} chapters",
parentOperationId=parentOperationId
)
try:
filledStructure = copy.deepcopy(structure)
- # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden)
- sections_to_process = []
- all_sections_list = [] # Für Kontext-Informationen
- for doc in filledStructure.get("documents", []):
- doc_sections = doc.get("sections", [])
- all_sections_list.extend(doc_sections)
- for section in doc_sections:
- sections_to_process.append((doc, section))
+ # Phase 5D.1: Sections-Struktur für jedes Chapter generieren
+ filledStructure = await self._generateChapterSectionsStructure(
+ filledStructure, contentParts, userPrompt, fillOperationId
+ )
- # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden)
- for sectionIndex, (doc, section) in enumerate(sections_to_process):
- sectionId = section.get("id")
- contentPartIds = section.get("contentPartIds", [])
- contentFormats = section.get("contentFormats", {})
- generationHint = section.get("generation_hint")
- contentType = section.get("content_type", "paragraph")
+ # Phase 5D.2: Sections mit ContentParts füllen
+ filledStructure = await self._fillChapterSections(
+ filledStructure, contentParts, userPrompt, fillOperationId
+ )
+
+ # Flattening: Chapters zu Sections konvertieren
+ flattenedStructure = self._flattenChaptersToSections(filledStructure)
+
+ # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
+ flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
+
+ # ChatLog abschließen
+ self.services.chat.progressLogFinish(fillOperationId, True)
+
+ return flattenedStructure
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(fillOperationId, False)
+ logger.error(f"Error in fillStructure: {str(e)}")
+ raise
+
+ async def _generateChapterSectionsStructure(
+ self,
+ chapterStructure: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str,
+ parentOperationId: str
+ ) -> Dict[str, Any]:
+ """
+ Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content).
+ Sections enthalten: content_type, contentPartIds, generationHint, useAiCall
+ """
+ for doc in chapterStructure.get("documents", []):
+ for chapter in doc.get("chapters", []):
+ chapterId = chapter.get("id", "unknown")
+ chapterLevel = chapter.get("level", 1)
+ chapterTitle = chapter.get("title", "")
+ generationHint = chapter.get("generationHint", "")
+ contentPartIds = chapter.get("contentPartIds", [])
+ contentPartInstructions = chapter.get("contentPartInstructions", {})
- elements = []
-
- # Prüfe ob Aggregation nötig ist
- needsAggregation = self._needsAggregation(
- contentType=contentType,
- contentPartCount=len(contentPartIds)
+ chapterPrompt = self._buildChapterSectionsStructurePrompt(
+ chapterId=chapterId,
+ chapterLevel=chapterLevel,
+ chapterTitle=chapterTitle,
+ generationHint=generationHint,
+ contentPartIds=contentPartIds,
+ contentPartInstructions=contentPartInstructions,
+ contentParts=contentParts,
+ userPrompt=userPrompt
)
- if needsAggregation and generationHint:
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ chapterPrompt,
+ f"chapter_structure_{chapterId}_prompt"
+ )
+
+ aiResponse = await self.aiService.callAiPlanning(
+ prompt=chapterPrompt,
+ debugType=f"chapter_structure_{chapterId}"
+ )
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse,
+ f"chapter_structure_{chapterId}_response"
+ )
+
+ sectionsStructure = json.loads(
+ self.services.utils.jsonExtractString(aiResponse)
+ )
+
+ chapter["sections"] = sectionsStructure.get("sections", [])
+
+ # Setze useAiCall Flag (falls nicht von AI gesetzt)
+ for section in chapter["sections"]:
+ if "useAiCall" not in section:
+ contentType = section.get("content_type", "paragraph")
+ useAiCall = contentType != "paragraph"
+
+ # Prüfe contentPartInstructions
+ if not useAiCall:
+ for partId in section.get("contentPartIds", []):
+ instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
+ if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
+ useAiCall = True
+ break
+
+ section["useAiCall"] = useAiCall
+
+ return chapterStructure
+
+ async def _fillChapterSections(
+ self,
+ chapterStructure: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str,
+ parentOperationId: str
+ ) -> Dict[str, Any]:
+ """
+ Phase 5D.2: Füllt Sections mit ContentParts.
+ """
+ # Sammle alle Sections für sequenzielle Verarbeitung
+ sections_to_process = []
+ all_sections_list = [] # Für Kontext-Informationen
+ for doc in chapterStructure.get("documents", []):
+ for chapter in doc.get("chapters", []):
+ for section in chapter.get("sections", []):
+ all_sections_list.append(section)
+ sections_to_process.append((doc, chapter, section))
+
+ # Sequenzielle Section-Generierung
+ fillOperationId = parentOperationId
+ for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process):
+ sectionId = section.get("id")
+ contentPartIds = section.get("contentPartIds", [])
+ contentFormats = section.get("contentFormats", {})
+ generationHint = section.get("generation_hint")
+ contentType = section.get("content_type", "paragraph")
+ useAiCall = section.get("useAiCall", False)
+
+ elements = []
+
+ # Prüfe ob Aggregation nötig ist
+ needsAggregation = self._needsAggregation(
+ contentType=contentType,
+ contentPartCount=len(contentPartIds)
+ )
+
+ if needsAggregation and useAiCall:
# Aggregation: Alle Parts zusammen verarbeiten
sectionParts = [
self._findContentPartById(pid, contentParts)
@@ -201,8 +320,8 @@ class StructureFiller:
})
logger.error(f"Error generating section {sectionId}: {str(e)}")
# NICHT raise - Section wird mit Fehlermeldung gerendert
-
- else:
+
+ else:
# Einzelverarbeitung: Jeder Part einzeln
for partId in contentPartIds:
part = self._findContentPartById(partId, contentParts)
@@ -308,19 +427,429 @@ class StructureFiller:
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
+
+ section["elements"] = elements
+
+ return chapterStructure
+
+ def _addContentPartsMetadata(
+ self,
+ structure: Dict[str, Any],
+ contentParts: List[ContentPart]
+ ) -> Dict[str, Any]:
+ """
+ Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind.
+ Dies hilft der Validierung, den Kontext der ContentParts zu verstehen.
+ """
+ # Erstelle Mapping von ContentPart-ID zu Metadaten
+ contentPartsMap = {}
+ for part in contentParts:
+ contentPartsMap[part.id] = {
+ "id": part.id,
+ "format": part.metadata.get("contentFormat", "unknown"),
+ "type": part.typeGroup,
+ "mimeType": part.mimeType,
+ "originalFileName": part.metadata.get("originalFileName"),
+ "usageHint": part.metadata.get("usageHint"),
+ "documentId": part.metadata.get("documentId"),
+ "dataSize": len(str(part.data)) if part.data else 0
+ }
+
+ # Füge Metadaten zu Sections hinzu, die contentPartIds haben
+ for doc in structure.get("documents", []):
+ # Prüfe ob Chapters vorhanden sind (neue Struktur)
+ if "chapters" in doc:
+ for chapter in doc.get("chapters", []):
+ # Füge Metadaten zu Chapter-Level contentPartIds hinzu
+ chapterContentPartIds = chapter.get("contentPartIds", [])
+ if chapterContentPartIds:
+ chapter["contentPartsMetadata"] = []
+ for partId in chapterContentPartIds:
+ if partId in contentPartsMap:
+ chapter["contentPartsMetadata"].append(contentPartsMap[partId])
+
+ # Füge Metadaten zu Sections hinzu
+ for section in chapter.get("sections", []):
+ contentPartIds = section.get("contentPartIds", [])
+ if contentPartIds:
+ section["contentPartsMetadata"] = []
+ for partId in contentPartIds:
+ if partId in contentPartsMap:
+ section["contentPartsMetadata"].append(contentPartsMap[partId])
+
+ # Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur)
+ elif "sections" in doc:
+ for section in doc.get("sections", []):
+ contentPartIds = section.get("contentPartIds", [])
+ if contentPartIds:
+ section["contentPartsMetadata"] = []
+ for partId in contentPartIds:
+ if partId in contentPartsMap:
+ section["contentPartsMetadata"].append(contentPartsMap[partId])
+
+ return structure
+
+ def _flattenChaptersToSections(
+ self,
+ chapterStructure: Dict[str, Any]
+ ) -> Dict[str, Any]:
+ """
+ Flattening: Konvertiert Chapters zu finaler Section-Struktur.
+ Jedes Chapter wird zu einer Heading-Section + dessen Sections.
+ """
+ result = {
+ "metadata": chapterStructure.get("metadata", {}),
+ "documents": []
+ }
+
+ for doc in chapterStructure.get("documents", []):
+ flattened_doc = {
+ "id": doc.get("id"),
+ "title": doc.get("title"),
+ "filename": doc.get("filename"),
+ "sections": []
+ }
+
+ for chapter in doc.get("chapters", []):
+ # 1. Vordefinierte Heading-Section für Chapter-Title
+ heading_section = {
+ "id": f"{chapter['id']}_heading",
+ "content_type": "heading",
+ "elements": [{
+ "type": "heading",
+ "content": chapter.get("title"),
+ "level": chapter.get("level", 1)
+ }]
+ }
+ flattened_doc["sections"].append(heading_section)
+
+ # 2. Generierte Sections
+ flattened_doc["sections"].extend(chapter.get("sections", []))
+
+ result["documents"].append(flattened_doc)
+
+ return result
+
+ async def _fillStructureLegacy(
+ self,
+ structure: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str,
+ fillOperationId: str
+ ) -> Dict[str, Any]:
+ """
+ Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität).
+ """
+ # Starte ChatLog
+ self.services.chat.progressLogStart(
+ fillOperationId,
+ "Structure Filling (Legacy)",
+ "Filling",
+ f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ filledStructure = copy.deepcopy(structure)
+
+ # Sammle alle Sections
+ sections_to_process = []
+ all_sections_list = []
+ for doc in filledStructure.get("documents", []):
+ doc_sections = doc.get("sections", [])
+ all_sections_list.extend(doc_sections)
+ for section in doc_sections:
+ sections_to_process.append((doc, section))
+
+ # Verarbeite Sections (bestehende Logik)
+ for sectionIndex, (doc, section) in enumerate(sections_to_process):
+ sectionId = section.get("id")
+ contentPartIds = section.get("contentPartIds", [])
+ contentFormats = section.get("contentFormats", {})
+ generationHint = section.get("generation_hint")
+ contentType = section.get("content_type", "paragraph")
+
+ elements = []
+
+ # Prüfe ob Aggregation nötig ist
+ needsAggregation = self._needsAggregation(
+ contentType=contentType,
+ contentPartCount=len(contentPartIds)
+ )
+
+ if needsAggregation and generationHint:
+ # Aggregation: Alle Parts zusammen verarbeiten
+ sectionParts = [
+ self._findContentPartById(pid, contentParts)
+ for pid in contentPartIds
+ ]
+ sectionParts = [p for p in sectionParts if p is not None]
+
+ if sectionParts:
+ # Filtere nur extracted Parts für Aggregation
+ extractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
+ ]
+ nonExtractedParts = [
+ p for p in sectionParts
+ if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
+ ]
+
+ # Verarbeite non-extracted Parts separat
+ for part in nonExtractedParts:
+ contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+ elif contentFormat == "object":
+ elements.append({
+ "type": part.typeGroup,
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
+
+ # Aggregiere extracted Parts mit AI
+ if extractedParts:
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=extractedParts,
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=True
+ )
+
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation (Aggregation)",
+ "Section",
+ f"Generating section {sectionId} with {len(extractedParts)} parts",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=extractedParts,
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.aiService.callAi(request)
+
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+
+ else:
+ # Einzelverarbeitung: Jeder Part einzeln
+ for partId in contentPartIds:
+ part = self._findContentPartById(partId, contentParts)
+ if not part:
+ continue
+
+ contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
+
+ if contentFormat == "reference":
+ elements.append({
+ "type": "reference",
+ "documentReference": part.metadata.get("documentReference"),
+ "label": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "object":
+ elements.append({
+ "type": part.typeGroup,
+ "base64Data": part.data,
+ "mimeType": part.mimeType,
+ "altText": part.metadata.get("usageHint", part.label)
+ })
+
+ elif contentFormat == "extracted":
+ if generationHint:
+ # AI-Call mit einzelnen ContentPart
+ generationPrompt = self._buildSectionGenerationPrompt(
+ section=section,
+ contentParts=[part],
+ userPrompt=userPrompt,
+ generationHint=generationHint,
+ allSections=all_sections_list,
+ sectionIndex=sectionIndex,
+ isAggregation=False
+ )
+
+ sectionOperationId = f"{fillOperationId}_section_{sectionId}"
+
+ self.services.chat.progressLogStart(
+ sectionOperationId,
+ "Section Generation",
+ "Section",
+ f"Generating section {sectionId}",
+ parentOperationId=fillOperationId
+ )
+
+ try:
+ self.services.utils.writeDebugFile(
+ generationPrompt,
+ f"section_content_{sectionId}_prompt"
+ )
+
+ request = AiCallRequest(
+ prompt=generationPrompt,
+ contentParts=[part],
+ options=AiCallOptions(
+ operationType=OperationTypeEnum.DATA_ANALYSE,
+ priority=PriorityEnum.BALANCED,
+ processingMode=ProcessingModeEnum.DETAILED
+ )
+ )
+ aiResponse = await self.aiService.callAi(request)
+
+ self.services.utils.writeDebugFile(
+ aiResponse.content,
+ f"section_content_{sectionId}_response"
+ )
+
+ generatedElements = json.loads(
+ self.services.utils.jsonExtractString(aiResponse.content)
+ )
+ if isinstance(generatedElements, list):
+ elements.extend(generatedElements)
+ elif isinstance(generatedElements, dict) and "elements" in generatedElements:
+ elements.extend(generatedElements["elements"])
+
+ self.services.chat.progressLogFinish(sectionOperationId, True)
+
+ except Exception as e:
+ self.services.chat.progressLogFinish(sectionOperationId, False)
+ elements.append({
+ "type": "error",
+ "message": f"Error generating section {sectionId}: {str(e)}",
+ "sectionId": sectionId
+ })
+ logger.error(f"Error generating section {sectionId}: {str(e)}")
+ else:
+ elements.append({
+ "type": "extracted_text",
+ "content": part.data,
+ "source": part.metadata.get("documentId"),
+ "extractionPrompt": part.metadata.get("extractionPrompt")
+ })
section["elements"] = elements
- # ChatLog abschließen
- self.services.chat.progressLogFinish(fillOperationId, True)
+ # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
+ filledStructure = self._addContentPartsMetadata(filledStructure, contentParts)
+ self.services.chat.progressLogFinish(fillOperationId, True)
return filledStructure
except Exception as e:
self.services.chat.progressLogFinish(fillOperationId, False)
- logger.error(f"Error in fillStructure: {str(e)}")
+ logger.error(f"Error in _fillStructureLegacy: {str(e)}")
raise
+ def _buildChapterSectionsStructurePrompt(
+ self,
+ chapterId: str,
+ chapterLevel: int,
+ chapterTitle: str,
+ generationHint: str,
+ contentPartIds: List[str],
+ contentPartInstructions: Dict[str, Any],
+ contentParts: List[ContentPart],
+ userPrompt: str
+ ) -> str:
+ """Baue Prompt für Chapter-Sections-Struktur-Generierung."""
+ # Baue ContentParts-Index (nur IDs, keine Previews!)
+ contentPartsIndex = ""
+ for partId in contentPartIds:
+ part = self._findContentPartById(partId, contentParts)
+ if not part:
+ continue
+
+ contentFormat = part.metadata.get("contentFormat", "unknown")
+ instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
+
+ contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
+ contentPartsIndex += f" Format: {contentFormat}\n"
+ contentPartsIndex += f" Type: {part.typeGroup}\n"
+ contentPartsIndex += f" Instruction: {instruction}\n"
+
+ if not contentPartsIndex:
+ contentPartsIndex = "\n(No content parts specified for this chapter)"
+
+ prompt = f"""TASK: Generate Chapter Sections Structure
+
+CHAPTER METADATA:
+- Chapter ID: {chapterId}
+- Chapter Level: {chapterLevel}
+- Chapter Title: {chapterTitle}
+- Generation Hint: {generationHint}
+
+WICHTIG: Chapter hat bereits vordefinierte Heading-Section.
+Generiere NICHT eine Heading-Section für Chapter-Title!
+
+AVAILABLE CONTENT PARTS:
+{contentPartsIndex}
+
+STANDARD JSON SCHEMA FOR SECTIONS:
+Supported content_types: table, bullet_list, heading, paragraph, code_block, image
+
+Return JSON:
+{{
+ "sections": [
+ {{
+ "id": "section_1",
+ "content_type": "paragraph",
+ "contentPartIds": ["part_ext_1"],
+ "generationHint": "...",
+ "useAiCall": false,
+ "elements": []
+ }}
+ ]
+}}
+
+CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
+"""
+ return prompt
+
def _buildSectionGenerationPrompt(
self,
section: Dict[str, Any],
diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py
index eb39fdd6..a4d7a19e 100644
--- a/modules/services/serviceAi/subStructureGeneration.py
+++ b/modules/services/serviceAi/subStructureGeneration.py
@@ -32,11 +32,12 @@ class StructureGenerator:
parentOperationId: str
) -> Dict[str, Any]:
"""
- Phase 5C: Generiert Dokument-Struktur mit Sections.
- Jede Section spezifiziert:
- - Welcher Content sollte in dieser Section sein
- - Welche ContentParts zu verwenden sind
- - Format für jeden ContentPart
+ Phase 5C: Generiert Chapter-Struktur (Table of Contents).
+ Definiert für jedes Chapter:
+ - Level, Title
+ - contentPartIds
+ - contentPartInstructions
+ - generationHint
Args:
userPrompt: User-Anfrage
@@ -45,7 +46,7 @@ class StructureGenerator:
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns:
- Struktur-Dict mit documents und sections
+ Struktur-Dict mit documents und chapters (nicht sections!)
"""
# Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation"
@@ -53,25 +54,36 @@ class StructureGenerator:
# Starte ChatLog mit Parent-Referenz
self.services.chat.progressLogStart(
structureOperationId,
- "Structure Generation",
+ "Chapter Structure Generation",
"Structure",
- f"Generating structure for {outputFormat}",
+ f"Generating chapter structure for {outputFormat}",
parentOperationId=parentOperationId
)
try:
- # Baue Struktur-Prompt mit Content-Index
- structurePrompt = self._buildStructurePrompt(
+ # Baue Chapter-Struktur-Prompt mit Content-Index
+ structurePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt,
contentParts=contentParts,
outputFormat=outputFormat
)
- # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses)
- # Debug-Logs werden bereits von callAiPlanning geschrieben
+ # Debug: Log Prompt
+ self.services.utils.writeDebugFile(
+ structurePrompt,
+ "chapter_structure_generation_prompt"
+ )
+
+ # AI-Call für Chapter-Struktur-Generierung
aiResponse = await self.aiService.callAiPlanning(
prompt=structurePrompt,
- debugType="document_generation_structure"
+ debugType="chapter_structure_generation"
+ )
+
+ # Debug: Log Response
+ self.services.utils.writeDebugFile(
+ aiResponse,
+ "chapter_structure_generation_response"
)
# Parse Struktur
@@ -87,13 +99,13 @@ class StructureGenerator:
logger.error(f"Error in generateStructure: {str(e)}")
raise
- def _buildStructurePrompt(
+ def _buildChapterStructurePrompt(
self,
userPrompt: str,
contentParts: List[ContentPart],
outputFormat: str
) -> str:
- """Baue Prompt für Struktur-Generierung."""
+ """Baue Prompt für Chapter-Struktur-Generierung."""
# Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = ""
validParts = []
@@ -179,14 +191,19 @@ class StructureGenerator:
AVAILABLE CONTENT PARTS:
{contentPartsIndex}
-TASK: Generiere Dokument-Struktur mit Sections.
-Für jede Section, spezifiziere:
-- section id
-- content_type (heading, paragraph, image, table, etc.)
-- contentPartIds: [Liste von ContentPart-IDs zu verwenden]
-- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist
-- generation_hint: Was AI für diese Section generieren soll
-- elements: [] (leer, wird in nächster Phase gefüllt)
+TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente.
+
+Für jedes Chapter:
+- chapter id
+- level (1, 2, 3, etc.)
+- title
+- contentPartIds: [Liste von ContentPart-IDs]
+- contentPartInstructions: {{
+ "partId": {{
+ "instruction": "Wie Content strukturiert werden soll"
+ }}
+}}
+- generationHint: Beschreibung des Inhalts
OUTPUT FORMAT: {outputFormat}
@@ -200,24 +217,19 @@ RETURN JSON:
"id": "doc_1",
"title": "Document Title",
"filename": "document.{outputFormat}",
- "sections": [
+ "chapters": [
{{
- "id": "section_1",
- "content_type": "heading",
- "generation_hint": "Main title",
- "contentPartIds": [],
- "contentFormats": {{}},
- "elements": []
- }},
- {{
- "id": "section_2",
- "content_type": "paragraph",
- "generation_hint": "Introduction paragraph",
+ "id": "chapter_1",
+ "level": 1,
+ "title": "Introduction",
"contentPartIds": ["part_ext_1"],
- "contentFormats": {{
- "part_ext_1": "extracted"
+ "contentPartInstructions": {{
+ "part_ext_1": {{
+ "instruction": "Use full extracted text"
+ }}
}},
- "elements": []
+ "generationHint": "Create introduction section",
+ "sections": []
}}
]
}}]
diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py
index e08eaa81..828f1033 100644
--- a/modules/services/serviceGeneration/mainServiceGeneration.py
+++ b/modules/services/serviceGeneration/mainServiceGeneration.py
@@ -5,6 +5,7 @@ import uuid
import base64
import traceback
from typing import Any, Dict, List, Optional, Callable
+from modules.datamodels.datamodelDocument import RenderedDocument
from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension,
@@ -345,31 +346,31 @@ class GenerationService:
'workflowId': 'unknown'
}
- async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]:
+ async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
"""
Render extracted JSON content to the specified output format.
- Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering).
- Always uses unified "documents" array format.
- Supports three content formats: reference, object (base64), extracted_text.
+ Processes EACH document separately and calls renderer for each.
+ Each renderer can return 1..n documents (e.g., HTML + images).
Args:
- extractedContent: Structured JSON document from AI extraction
+ extractedContent: Structured JSON document with documents array
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
+ In future, each document can have its own format
title: Report title
userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging
Returns:
- tuple: (rendered_content, mime_type, images_list)
- images_list: List of image dicts with base64Data, altText, caption, etc.
+ List of RenderedDocument objects.
+ Each RenderedDocument represents one rendered file (main document or supporting file)
"""
try:
# Validate JSON input
if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary")
- # Unified approach: Always expect "documents" array (single doc = n=1)
+ # Unified approach: Always expect "documents" array
if "documents" not in extractedContent:
raise ValueError("extractedContent must contain 'documents' array")
@@ -377,56 +378,45 @@ class GenerationService:
if len(documents) == 0:
raise ValueError("No documents found in 'documents' array")
- # Phase 5: Multi-Dokument-Rendering
- if len(documents) == 1:
- # Single document - use existing logic
- single_doc = documents[0]
- if "sections" not in single_doc:
- raise ValueError("Document must contain 'sections' field")
+ metadata = extractedContent.get("metadata", {})
+ allRenderedDocuments = []
+
+ # Process EACH document separately
+ for docIndex, doc in enumerate(documents):
+ if not isinstance(doc, dict):
+ logger.warning(f"Skipping invalid document at index {docIndex}")
+ continue
- # Pass standardized schema to renderer (maintains architecture)
- contentToRender = extractedContent # Pass full standardized schema
- else:
- # Multiple documents - merge all sections into one document for rendering
- # Option: Merge all sections from all documents into a single document
- all_sections = []
- for doc in documents:
- if isinstance(doc, dict) and "sections" in doc:
- sections = doc.get("sections", [])
- if isinstance(sections, list):
- all_sections.extend(sections)
+ if "sections" not in doc:
+ logger.warning(f"Document {doc.get('id', docIndex)} has no sections, skipping")
+ continue
- if not all_sections:
- raise ValueError("No sections found in any document")
+ # Determine format for this document
+ # TODO: In future, each document can have its own format field
+ # For now, use the global outputFormat
+ docFormat = doc.get("format", outputFormat)
- # Create merged document with all sections
- merged_document = {
- "metadata": extractedContent.get("metadata", {}),
- "documents": [{
- "id": "merged",
- "title": title,
- "filename": f"{title}.{outputFormat}",
- "sections": all_sections
- }]
+ # Get renderer for this document's format
+ renderer = self._getFormatRenderer(docFormat)
+ if not renderer:
+ logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
+ continue
+
+ # Create JSON structure with single document (preserving metadata)
+ singleDocContent = {
+ "metadata": metadata,
+ "documents": [doc] # Only this document
}
- contentToRender = merged_document
- logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections")
-
- # Get the appropriate renderer for the format
- renderer = self._getFormatRenderer(outputFormat)
- if not renderer:
- raise ValueError(f"Unsupported output format: {outputFormat}")
+
+ # Use document title or fallback to provided title
+ docTitle = doc.get("title", title)
+
+ # Render this document (can return multiple files, e.g., HTML + images)
+ renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
+ allRenderedDocuments.extend(renderedDocs)
- # Render the JSON content directly (AI generation handled by main service)
- # Renderer receives standardized schema and extracts what it needs
- renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
-
- # Get images from renderer if available
- images = []
- if hasattr(renderer, 'getRenderedImages'):
- images = renderer.getRenderedImages()
-
- return renderedContent, mimeType, images
+ logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
+ return allRenderedDocuments
except Exception as e:
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
index e9693680..e15e0711 100644
--- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
+++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py
@@ -5,8 +5,9 @@ Base renderer class for all format renderers.
"""
from abc import ABC, abstractmethod
-from typing import Dict, Any, Tuple, List
+from typing import Dict, Any, List
from modules.datamodels.datamodelJson import supportedSectionTypes
+from modules.datamodels.datamodelDocument import RenderedDocument
import json
import logging
import re
@@ -50,21 +51,49 @@ class BaseRenderer(ABC):
return 0
@abstractmethod
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
- Render extracted JSON content to the target format.
+ Render extracted JSON content to multiple documents.
+ Each renderer must implement this method.
+ Can return 1..n documents (e.g., HTML + images).
Args:
- extractedContent: Structured JSON content with sections and metadata
+ extractedContent: Structured JSON content with sections and metadata (contains single document)
title: Report title
userPrompt: Original user prompt for context
aiService: AI service instance for additional processing
Returns:
- tuple: (renderedContent, mimeType)
+ List of RenderedDocument objects.
+ First document is the main document, additional documents are supporting files (e.g., images).
+ Even if only one document is returned, it must be wrapped in a list.
"""
pass
+ def _determineFilename(self, title: str, mimeType: str) -> str:
+ """Determine filename from title and mimeType."""
+ import re
+ # Get extension from mimeType
+ extensionMap = {
+ "text/html": "html",
+ "application/pdf": "pdf",
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
+ "text/plain": "txt",
+ "text/markdown": "md",
+ "application/json": "json",
+ "text/csv": "csv"
+ }
+ extension = extensionMap.get(mimeType, "txt")
+
+ # Sanitize title for filename
+ sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title)
+ sanitized = re.sub(r"_+", "_", sanitized).strip("_")
+ if not sanitized:
+ sanitized = "document"
+
+ return f"{sanitized}.{extension}"
+
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
diff --git a/modules/services/serviceGeneration/renderers/rendererCsv.py b/modules/services/serviceGeneration/renderers/rendererCsv.py
index c18d7481..52e2933d 100644
--- a/modules/services/serviceGeneration/renderers/rendererCsv.py
+++ b/modules/services/serviceGeneration/renderers/rendererCsv.py
@@ -5,7 +5,8 @@ CSV renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction."""
@@ -25,13 +26,28 @@ class RendererCsv(BaseRenderer):
"""Return priority for CSV renderer."""
return 70
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to CSV format."""
try:
# Generate CSV directly from JSON (no styling needed for CSV)
csvContent = await self._generateCsvFromJson(extractedContent, title)
- return csvContent, "text/csv"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "text/csv")
+ else:
+ filename = self._determineFilename(title, "text/csv")
+
+ return [
+ RenderedDocument(
+ documentData=csvContent.encode('utf-8'),
+ mimeType="text/csv",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py
index f62935d8..ee88369f 100644
--- a/modules/services/serviceGeneration/renderers/rendererDocx.py
+++ b/modules/services/serviceGeneration/renderers/rendererDocx.py
@@ -5,7 +5,8 @@ DOCX renderer for report generation using python-docx.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
import io
import base64
import re
@@ -38,7 +39,7 @@ class RendererDocx(BaseRenderer):
"""Return priority for DOCX renderer."""
return 115
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try:
@@ -46,18 +47,48 @@ class RendererDocx(BaseRenderer):
# Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml
htmlRenderer = RendererHtml()
- htmlContent, _ = await htmlRenderer.render(extractedContent, title)
- return htmlContent, "text/html"
+ return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
# Generate DOCX using AI-analyzed styling
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
- return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+ else:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
+
+ # Convert DOCX content to bytes if it's a string (base64)
+ if isinstance(docx_content, str):
+ try:
+ docx_bytes = base64.b64decode(docx_content)
+ except Exception:
+ docx_bytes = docx_content.encode('utf-8')
+ else:
+ docx_bytes = docx_content
+
+ return [
+ RenderedDocument(
+ documentData=docx_bytes,
+ mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering DOCX: {str(e)}")
# Return minimal fallback
- return f"DOCX Generation Error: {str(e)}", "text/plain"
+ fallbackContent = f"DOCX Generation Error: {str(e)}"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=self._determineFilename(title, "text/plain")
+ )
+ ]
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document."""
diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py
index 213bf641..dba6a03f 100644
--- a/modules/services/serviceGeneration/renderers/rendererHtml.py
+++ b/modules/services/serviceGeneration/renderers/rendererHtml.py
@@ -5,7 +5,8 @@ HTML renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction."""
@@ -25,29 +26,66 @@ class RendererHtml(BaseRenderer):
"""Return priority for HTML renderer."""
return 100
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
- """Render extracted JSON content to HTML format using AI-analyzed styling."""
- try:
- # Extract images first
- images = self._extractImages(extractedContent)
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
+ """
+ Render HTML document with images as separate files.
+ Returns list of documents: [HTML document, image1, image2, ...]
+ """
+ import base64
+
+ # Extract images first
+ images = self._extractImages(extractedContent)
+
+ # Store images in instance for later retrieval
+ self._renderedImages = images
+
+ # Generate HTML using AI-analyzed styling
+ htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
+
+ # Replace base64 data URIs with relative file paths if images exist
+ if images:
+ htmlContent = self._replaceImageDataUris(htmlContent, images)
+
+ # Determine HTML filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ htmlFilename = documents[0].get("filename")
+ if not htmlFilename:
+ htmlFilename = self._determineFilename(title, "text/html")
+ else:
+ htmlFilename = self._determineFilename(title, "text/html")
+
+ # Start with HTML document
+ resultDocuments = [
+ RenderedDocument(
+ documentData=htmlContent.encode('utf-8'),
+ mimeType="text/html",
+ filename=htmlFilename
+ )
+ ]
+
+ # Add images as separate documents
+ for img in images:
+ base64Data = img.get("base64Data", "")
+ filename = img.get("filename", f"image_{len(resultDocuments)}.png")
+ mimeType = img.get("mimeType", "image/png")
- # Store images in instance for later retrieval
- self._renderedImages = images
-
- # Generate HTML using AI-analyzed styling
- htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
-
- # Replace base64 data URIs with relative file paths if images exist
- if images:
- htmlContent = self._replaceImageDataUris(htmlContent, images)
-
- return htmlContent, "text/html"
-
- except Exception as e:
- self.logger.error(f"Error rendering HTML: {str(e)}")
- # Return minimal HTML fallback
- self._renderedImages = [] # Initialize empty list on error
- return f"{title}{title}
Error rendering report: {str(e)}
", "text/html"
+ if base64Data:
+ try:
+ # Decode base64 to bytes
+ imageBytes = base64.b64decode(base64Data)
+ resultDocuments.append(
+ RenderedDocument(
+ documentData=imageBytes,
+ mimeType=mimeType,
+ filename=filename
+ )
+ )
+ self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)")
+ except Exception as e:
+ self.logger.warning(f"Error creating image file {filename}: {str(e)}")
+
+ return resultDocuments
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling."""
@@ -597,8 +635,31 @@ class RendererHtml(BaseRenderer):
if base64Data:
sectionId = section.get("id", "unknown")
+
+ # Bestimme MIME-Type und Extension
+ mimeType = element.get("mimeType", "image/png")
+ if not mimeType or mimeType == "unknown":
+ # Versuche MIME-Type aus base64 zu erkennen
+ if base64Data.startswith("/9j/"):
+ mimeType = "image/jpeg"
+ elif base64Data.startswith("iVBORw0KGgo"):
+ mimeType = "image/png"
+ else:
+ mimeType = "image/png" # Default
+
+ # Bestimme Extension basierend auf MIME-Type
+ extension = "png"
+ if mimeType == "image/jpeg" or mimeType == "image/jpg":
+ extension = "jpg"
+ elif mimeType == "image/png":
+ extension = "png"
+ elif mimeType == "image/gif":
+ extension = "gif"
+ elif mimeType == "image/webp":
+ extension = "webp"
+
# Generate filename from section ID
- filename = f"{sectionId}.png"
+ filename = f"{sectionId}.{extension}"
# Clean filename (remove invalid characters)
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
@@ -607,7 +668,8 @@ class RendererHtml(BaseRenderer):
"altText": element.get("altText", "Image"),
"caption": element.get("caption"),
"sectionId": sectionId,
- "filename": filename
+ "filename": filename,
+ "mimeType": mimeType
})
self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
@@ -633,8 +695,9 @@ class RendererHtml(BaseRenderer):
import base64
import re
- # Find all image data URIs in HTML
- dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)'
+ # Find all image data URIs in HTML (verschiedene MIME-Types unterstützen)
+ # Pattern: data:image/[type];base64,
+ dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)'
def replaceDataUri(match):
base64Data = match.group(1)
@@ -642,7 +705,9 @@ class RendererHtml(BaseRenderer):
# Find matching image in images list
matchingImage = None
for img in images:
- if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]):
+ imgBase64 = img.get("base64Data", "")
+ # Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding)
+ if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]):
matchingImage = img
break
@@ -650,20 +715,25 @@ class RendererHtml(BaseRenderer):
# Use filename from image data (generated from section ID)
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
- # Replace with relative path
+ # Replace with relative path (ohne Pfad, nur Dateiname)
altText = matchingImage.get("altText", "Image")
caption = matchingImage.get("caption", "")
+ # Entferne IMAGE_MARKER Kommentar falls vorhanden
+ imgTag = f'
'
+
if caption:
- return f'
{caption}'
+ return f'{imgTag}{caption}'
else:
- return f'
'
+ return imgTag
else:
# Keep original if no match found
return match.group(0)
- # Replace all data URIs
+ # Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen)
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
+ # Entferne IMAGE_MARKER Kommentare die übrig geblieben sind
+ updatedHtml = re.sub(r'', '', updatedHtml)
return updatedHtml
diff --git a/modules/services/serviceGeneration/renderers/rendererImage.py b/modules/services/serviceGeneration/renderers/rendererImage.py
index ad83673b..7d317131 100644
--- a/modules/services/serviceGeneration/renderers/rendererImage.py
+++ b/modules/services/serviceGeneration/renderers/rendererImage.py
@@ -5,8 +5,10 @@ Image renderer for report generation using AI image generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
import logging
+import base64
logger = logging.getLogger(__name__)
@@ -28,13 +30,37 @@ class RendererImage(BaseRenderer):
"""Return priority for image renderer."""
return 90
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation."""
try:
# Generate AI image from content
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
- return imageContent, "image/png"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "image/png")
+ else:
+ filename = self._determineFilename(title, "image/png")
+
+ # Convert image content to bytes (base64 string or bytes)
+ if isinstance(imageContent, str):
+ try:
+ imageBytes = base64.b64decode(imageContent)
+ except Exception:
+ imageBytes = imageContent.encode('utf-8')
+ else:
+ imageBytes = imageContent
+
+ return [
+ RenderedDocument(
+ documentData=imageBytes,
+ mimeType="image/png",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}")
diff --git a/modules/services/serviceGeneration/renderers/rendererJson.py b/modules/services/serviceGeneration/renderers/rendererJson.py
index a7f3d644..04196cf4 100644
--- a/modules/services/serviceGeneration/renderers/rendererJson.py
+++ b/modules/services/serviceGeneration/renderers/rendererJson.py
@@ -5,7 +5,8 @@ JSON renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
import json
class RendererJson(BaseRenderer):
@@ -26,14 +27,29 @@ class RendererJson(BaseRenderer):
"""Return priority for JSON renderer."""
return 80
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to JSON format."""
try:
# The extracted content should already be JSON from the AI
# Just validate and format it
jsonContent = self._cleanJsonContent(extractedContent, title)
- return jsonContent, "application/json"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "application/json")
+ else:
+ filename = self._determineFilename(title, "application/json")
+
+ return [
+ RenderedDocument(
+ documentData=jsonContent.encode('utf-8'),
+ mimeType="application/json",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}")
@@ -43,7 +59,14 @@ class RendererJson(BaseRenderer):
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
"metadata": {"error": str(e)}
}
- return json.dumps(fallbackData, indent=2), "application/json"
+ fallbackContent = json.dumps(fallbackData, indent=2)
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="application/json",
+ filename=self._determineFilename(title, "application/json")
+ )
+ ]
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI."""
diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
index dfe2bda2..7b23eb25 100644
--- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py
+++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py
@@ -5,7 +5,8 @@ Markdown renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction."""
@@ -25,18 +26,40 @@ class RendererMarkdown(BaseRenderer):
"""Return priority for markdown renderer."""
return 95
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Markdown format."""
try:
# Generate markdown from JSON structure
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
- return markdownContent, "text/markdown"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "text/markdown")
+ else:
+ filename = self._determineFilename(title, "text/markdown")
+
+ return [
+ RenderedDocument(
+ documentData=markdownContent.encode('utf-8'),
+ mimeType="text/markdown",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback
- return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
+ fallbackContent = f"# {title}\n\nError rendering report: {str(e)}"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/markdown",
+ filename=self._determineFilename(title, "text/markdown")
+ )
+ ]
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document."""
diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py
index 128e84d3..9767449e 100644
--- a/modules/services/serviceGeneration/renderers/rendererPdf.py
+++ b/modules/services/serviceGeneration/renderers/rendererPdf.py
@@ -5,7 +5,8 @@ PDF renderer for report generation using reportlab.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
import io
import base64
@@ -38,25 +39,56 @@ class RendererPdf(BaseRenderer):
"""Return priority for PDF renderer."""
return 120
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
try:
if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml
html_renderer = RendererHtml()
- html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
- return html_content, "text/html"
+ return await html_renderer.render(extractedContent, title, userPrompt, aiService)
# Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
- return pdf_content, "application/pdf"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "application/pdf")
+ else:
+ filename = self._determineFilename(title, "application/pdf")
+
+ # Convert PDF content to bytes if it's a string (base64)
+ if isinstance(pdf_content, str):
+ # Try to decode as base64, otherwise encode as UTF-8
+ try:
+ pdf_bytes = base64.b64decode(pdf_content)
+ except Exception:
+ pdf_bytes = pdf_content.encode('utf-8')
+ else:
+ pdf_bytes = pdf_content
+
+ return [
+ RenderedDocument(
+ documentData=pdf_bytes,
+ mimeType="application/pdf",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback
- return f"PDF Generation Error: {str(e)}", "text/plain"
+ fallbackContent = f"PDF Generation Error: {str(e)}"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=self._determineFilename(title, "text/plain")
+ )
+ ]
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling."""
diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py
index e9ad334c..d12048c7 100644
--- a/modules/services/serviceGeneration/renderers/rendererPptx.py
+++ b/modules/services/serviceGeneration/renderers/rendererPptx.py
@@ -6,8 +6,9 @@ import io
import json
import re
from datetime import datetime, UTC
-from typing import Dict, Any, Optional, Tuple, List
+from typing import Dict, Any, Optional, List
from .rendererBaseTemplate import BaseRenderer
+from modules.datamodels.datamodelDocument import RenderedDocument
logger = logging.getLogger(__name__)
@@ -25,7 +26,7 @@ class RendererPptx(BaseRenderer):
"""Get list of supported output formats."""
return ["pptx", "ppt"]
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""
Render content as PowerPoint presentation from JSON data.
@@ -204,14 +205,44 @@ class RendererPptx(BaseRenderer):
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
- return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
+
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
+ else:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
+
+ return [
+ RenderedDocument(
+ documentData=pptx_bytes,
+ mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
+ filename=filename
+ )
+ ]
except ImportError:
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
- return "python-pptx library not installed", "text/plain"
+ fallbackContent = "python-pptx library not installed"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=self._determineFilename(title, "text/plain")
+ )
+ ]
except Exception as e:
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
- return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
+ fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=self._determineFilename(title, "text/plain")
+ )
+ ]
def _parseContentToSlides(self, content: str, title: str) -> list:
"""
diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py
index acbeaaf9..1948b29f 100644
--- a/modules/services/serviceGeneration/renderers/rendererText.py
+++ b/modules/services/serviceGeneration/renderers/rendererText.py
@@ -5,7 +5,8 @@ Text renderer for report generation.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction."""
@@ -47,18 +48,40 @@ class RendererText(BaseRenderer):
"""Return priority for text renderer."""
return 90
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to plain text format."""
try:
# Generate text from JSON structure
textContent = self._generateTextFromJson(extractedContent, title)
- return textContent, "text/plain"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "text/plain")
+ else:
+ filename = self._determineFilename(title, "text/plain")
+
+ return [
+ RenderedDocument(
+ documentData=textContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback
- return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
+ fallbackContent = f"{title}\n\nError rendering report: {str(e)}"
+ return [
+ RenderedDocument(
+ documentData=fallbackContent.encode('utf-8'),
+ mimeType="text/plain",
+ filename=self._determineFilename(title, "text/plain")
+ )
+ ]
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document."""
diff --git a/modules/services/serviceGeneration/renderers/rendererXlsx.py b/modules/services/serviceGeneration/renderers/rendererXlsx.py
index a8cffd56..d8d23065 100644
--- a/modules/services/serviceGeneration/renderers/rendererXlsx.py
+++ b/modules/services/serviceGeneration/renderers/rendererXlsx.py
@@ -5,7 +5,8 @@ Excel renderer for report generation using openpyxl.
"""
from .rendererBaseTemplate import BaseRenderer
-from typing import Dict, Any, Tuple, List
+from modules.datamodels.datamodelDocument import RenderedDocument
+from typing import Dict, Any, List
import io
import base64
from datetime import datetime, UTC
@@ -37,20 +38,43 @@ class RendererXlsx(BaseRenderer):
"""Return priority for Excel renderer."""
return 110
- async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
+ async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
try:
if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv
csvRenderer = RendererCsv()
- csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
- return csvContent, "text/csv"
+ return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
# Generate Excel using AI-analyzed styling
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
- return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+ # Determine filename from document or title
+ documents = extractedContent.get("documents", [])
+ if documents and isinstance(documents[0], dict):
+ filename = documents[0].get("filename")
+ if not filename:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+ else:
+ filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
+
+ # Convert Excel content to bytes if it's a string (base64)
+ if isinstance(excelContent, str):
+ try:
+ excel_bytes = base64.b64decode(excelContent)
+ except Exception:
+ excel_bytes = excelContent.encode('utf-8')
+ else:
+ excel_bytes = excelContent
+
+ return [
+ RenderedDocument(
+ documentData=excel_bytes,
+ mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
+ filename=filename
+ )
+ ]
except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}")
diff --git a/modules/workflows/processing/adaptive/contentValidator.py b/modules/workflows/processing/adaptive/contentValidator.py
index b1de9f98..4e405630 100644
--- a/modules/workflows/processing/adaptive/contentValidator.py
+++ b/modules/workflows/processing/adaptive/contentValidator.py
@@ -106,6 +106,18 @@ class ContentValidator:
if section.get("textPreview"):
sectionSummary["textPreview"] = section.get("textPreview")
+ # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
+ contentPartIds = section.get("contentPartIds", [])
+ if contentPartIds and not elements:
+ # Prüfe ob contentPartsMetadata vorhanden ist
+ contentPartsMetadata = section.get("contentPartsMetadata", [])
+ if contentPartsMetadata:
+ sectionSummary["contentPartsMetadata"] = contentPartsMetadata
+ else:
+ # Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
+ sectionSummary["contentPartIds"] = contentPartIds
+ sectionSummary["note"] = "ContentParts referenced but metadata not available"
+
# Include any additional fields from section (generic approach)
# This ensures all action-specific fields are preserved
for key, value in section.items():
@@ -141,6 +153,18 @@ class ContentValidator:
sectionSummary["rowCount"] = len(rows)
sectionSummary["headers"] = headers
+ # Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
+ contentPartIds = section.get("contentPartIds", [])
+ if contentPartIds and not elements:
+ # Prüfe ob contentPartsMetadata vorhanden ist
+ contentPartsMetadata = section.get("contentPartsMetadata", [])
+ if contentPartsMetadata:
+ sectionSummary["contentPartsMetadata"] = contentPartsMetadata
+ else:
+ # Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
+ sectionSummary["contentPartIds"] = contentPartIds
+ sectionSummary["note"] = "ContentParts referenced but metadata not available"
+
# Include any additional fields from section (generic approach)
for key, value in section.items():
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately