enhanced generation engine with chapters as structure, renderers to render a pipeline and deliver 1..n documents

This commit is contained in:
ValueOn AG 2025-12-28 11:43:42 +01:00
parent 9d4bd8ceef
commit 723f98ea7a
17 changed files with 1141 additions and 264 deletions

View file

@ -107,5 +107,17 @@ class StructuredDocument(BaseModel):
class RenderedDocument(BaseModel):
"""A single rendered document from a renderer."""
documentData: bytes = Field(description="Document content as bytes")
mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')")
filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')")
class Config:
json_encoders = {
bytes: lambda v: v.decode('utf-8', errors='replace') if isinstance(v, bytes) else v
}
# Update forward references # Update forward references
ListItem.model_rebuild() ListItem.model_rebuild()

View file

@ -11,6 +11,7 @@ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionS
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.interfaces.interfaceAiObjects import AiObjects from modules.interfaces.interfaceAiObjects import AiObjects
from modules.shared.jsonUtils import ( from modules.shared.jsonUtils import (
extractJsonString, extractJsonString,
@ -50,7 +51,7 @@ class AiService:
if self.extractionService is None: if self.extractionService is None:
logger.info("Initializing ExtractionService...") logger.info("Initializing ExtractionService...")
self.extractionService = ExtractionService(self.services) self.extractionService = ExtractionService(self.services)
# Initialize new submodules # Initialize new submodules
from modules.services.serviceAi.subResponseParsing import ResponseParser from modules.services.serviceAi.subResponseParsing import ResponseParser
from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
@ -277,7 +278,7 @@ Respond with ONLY a JSON object in this exact format:
) -> str: ) -> str:
"""Delegate to ResponseParser.""" """Delegate to ResponseParser."""
return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
# Public API Methods # Public API Methods
# Planning AI Call # Planning AI Call
@ -494,20 +495,21 @@ Respond with ONLY a JSON object in this exact format:
title: str, title: str,
userPrompt: str, userPrompt: str,
parentOperationId: str parentOperationId: str
) -> Tuple[bytes, str]: ) -> List[RenderedDocument]:
""" """
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert. Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
Args: Args:
filledStructure: Gefüllte Struktur mit elements filledStructure: Gefüllte Struktur mit elements
outputFormat: Ziel-Format (pdf, docx, html, etc.) outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
title: Dokument-Titel title: Dokument-Titel
userPrompt: User-Anfrage userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns: Returns:
Tuple von (renderedContent, mimeType) List of RenderedDocument objects.
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
""" """
# Erstelle Operation-ID für Rendering # Erstelle Operation-ID für Rendering
renderOperationId = f"{parentOperationId}_rendering" renderOperationId = f"{parentOperationId}_rendering"
@ -526,51 +528,21 @@ Respond with ONLY a JSON object in this exact format:
generationService = GenerationService(self.services) generationService = GenerationService(self.services)
# Multi-Dokument-Rendering # renderReport verarbeitet jetzt jedes Dokument einzeln
documents = filledStructure.get("documents", []) # und gibt Liste von (documentData, mimeType, filename) zurück
renderedDocuments = await generationService.renderReport(
if len(documents) == 1: filledStructure,
# Einzelnes Dokument - wie bisher outputFormat,
renderedContent, mimeType, images = await generationService.renderReport( title,
filledStructure, userPrompt,
outputFormat, self,
title, parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
userPrompt, )
self,
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
)
else:
# Mehrere Dokumente - rendere alle
# Option: Alle Sections zusammenführen und als ein Dokument rendern
all_sections = []
for doc in documents:
if "sections" in doc:
all_sections.extend(doc.get("sections", []))
# Erstelle temporäres Dokument mit allen Sections
merged_document = {
"metadata": filledStructure["metadata"],
"documents": [{
"id": "merged",
"title": title,
"filename": f"{title}.{outputFormat}",
"sections": all_sections
}]
}
renderedContent, mimeType, images = await generationService.renderReport(
merged_document,
outputFormat,
title,
userPrompt,
self,
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
)
# ChatLog abschließen # ChatLog abschließen
self.services.chat.progressLogFinish(renderOperationId, True) self.services.chat.progressLogFinish(renderOperationId, True)
return renderedContent, mimeType return renderedDocuments
except Exception as e: except Exception as e:
self.services.chat.progressLogFinish(renderOperationId, False) self.services.chat.progressLogFinish(renderOperationId, False)
@ -712,7 +684,8 @@ Respond with ONLY a JSON object in this exact format:
) )
# Schritt 5E: Rendere Resultat # Schritt 5E: Rendere Resultat
renderedContent, mimeType = await self._renderResult( # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
renderedDocuments = await self._renderResult(
filledStructure, filledStructure,
outputFormat, outputFormat,
title or "Generated Document", title or "Generated Document",
@ -720,15 +693,24 @@ Respond with ONLY a JSON object in this exact format:
aiOperationId aiOperationId
) )
# Baue Response # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
documentName = self._determineDocumentName(filledStructure, outputFormat, title) documentDataList = []
for renderedDoc in renderedDocuments:
try:
# Erstelle DocumentData für jedes gerenderte Dokument
docDataObj = DocumentData(
documentName=renderedDoc.filename,
documentData=renderedDoc.documentData,
mimeType=renderedDoc.mimeType,
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
)
documentDataList.append(docDataObj)
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
except Exception as e:
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
docData = DocumentData( if not documentDataList:
documentName=documentName, raise ValueError("No documents were rendered")
documentData=renderedContent,
mimeType=mimeType,
sourceJson=filledStructure
)
metadata = AiResponseMetadata( metadata = AiResponseMetadata(
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
@ -746,7 +728,7 @@ Respond with ONLY a JSON object in this exact format:
return AiResponse( return AiResponse(
content=json.dumps(filledStructure), content=json.dumps(filledStructure),
metadata=metadata, metadata=metadata,
documents=[docData] documents=documentDataList
) )
except Exception as e: except Exception as e:

View file

@ -35,65 +35,184 @@ class StructureFiller:
parentOperationId: str parentOperationId: str
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Phase 5D: Füllt Struktur mit tatsächlichem Content. Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz).
Für jede Section:
- Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format
- Wenn generation_hint spezifiziert: Generiere AI-Content
**Implementierungsdetails:** Phase 5D.1: Generiert Sections-Struktur für jedes Chapter
- Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) Phase 5D.2: Füllt Sections mit ContentParts
- Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses)
Args: Args:
structure: Struktur-Dict mit documents und sections structure: Struktur-Dict mit documents und chapters (nicht sections!)
contentParts: Alle vorbereiteten ContentParts contentParts: Alle vorbereiteten ContentParts
userPrompt: User-Anfrage userPrompt: User-Anfrage
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns: Returns:
Gefüllte Struktur mit elements in jeder Section Gefüllte Struktur mit elements in jeder Section (nach Flattening)
""" """
# Erstelle Operation-ID für Struktur-Abfüllen # Erstelle Operation-ID für Struktur-Abfüllen
fillOperationId = f"{parentOperationId}_structure_filling" fillOperationId = f"{parentOperationId}_structure_filling"
# Prüfe ob Struktur Chapters oder Sections hat
hasChapters = False
for doc in structure.get("documents", []):
if "chapters" in doc:
hasChapters = True
break
if not hasChapters:
# Fallback: Alte Struktur mit Sections direkt - verwende alte Logik
logger.warning("Structure has no chapters, using legacy section-based filling")
return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId)
# Starte ChatLog mit Parent-Referenz # Starte ChatLog mit Parent-Referenz
chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", []))
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
fillOperationId, fillOperationId,
"Structure Filling", "Chapter Content Generation",
"Filling", "Filling",
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", f"Processing {chapterCount} chapters",
parentOperationId=parentOperationId parentOperationId=parentOperationId
) )
try: try:
filledStructure = copy.deepcopy(structure) filledStructure = copy.deepcopy(structure)
# Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) # Phase 5D.1: Sections-Struktur für jedes Chapter generieren
sections_to_process = [] filledStructure = await self._generateChapterSectionsStructure(
all_sections_list = [] # Für Kontext-Informationen filledStructure, contentParts, userPrompt, fillOperationId
for doc in filledStructure.get("documents", []): )
doc_sections = doc.get("sections", [])
all_sections_list.extend(doc_sections)
for section in doc_sections:
sections_to_process.append((doc, section))
# Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) # Phase 5D.2: Sections mit ContentParts füllen
for sectionIndex, (doc, section) in enumerate(sections_to_process): filledStructure = await self._fillChapterSections(
sectionId = section.get("id") filledStructure, contentParts, userPrompt, fillOperationId
contentPartIds = section.get("contentPartIds", []) )
contentFormats = section.get("contentFormats", {})
generationHint = section.get("generation_hint") # Flattening: Chapters zu Sections konvertieren
contentType = section.get("content_type", "paragraph") flattenedStructure = self._flattenChaptersToSections(filledStructure)
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
# ChatLog abschließen
self.services.chat.progressLogFinish(fillOperationId, True)
return flattenedStructure
except Exception as e:
self.services.chat.progressLogFinish(fillOperationId, False)
logger.error(f"Error in fillStructure: {str(e)}")
raise
async def _generateChapterSectionsStructure(
self,
chapterStructure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
) -> Dict[str, Any]:
"""
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content).
Sections enthalten: content_type, contentPartIds, generationHint, useAiCall
"""
for doc in chapterStructure.get("documents", []):
for chapter in doc.get("chapters", []):
chapterId = chapter.get("id", "unknown")
chapterLevel = chapter.get("level", 1)
chapterTitle = chapter.get("title", "")
generationHint = chapter.get("generationHint", "")
contentPartIds = chapter.get("contentPartIds", [])
contentPartInstructions = chapter.get("contentPartInstructions", {})
elements = [] chapterPrompt = self._buildChapterSectionsStructurePrompt(
chapterId=chapterId,
# Prüfe ob Aggregation nötig ist chapterLevel=chapterLevel,
needsAggregation = self._needsAggregation( chapterTitle=chapterTitle,
contentType=contentType, generationHint=generationHint,
contentPartCount=len(contentPartIds) contentPartIds=contentPartIds,
contentPartInstructions=contentPartInstructions,
contentParts=contentParts,
userPrompt=userPrompt
) )
if needsAggregation and generationHint: # Debug: Log Prompt
self.services.utils.writeDebugFile(
chapterPrompt,
f"chapter_structure_{chapterId}_prompt"
)
aiResponse = await self.aiService.callAiPlanning(
prompt=chapterPrompt,
debugType=f"chapter_structure_{chapterId}"
)
# Debug: Log Response
self.services.utils.writeDebugFile(
aiResponse,
f"chapter_structure_{chapterId}_response"
)
sectionsStructure = json.loads(
self.services.utils.jsonExtractString(aiResponse)
)
chapter["sections"] = sectionsStructure.get("sections", [])
# Setze useAiCall Flag (falls nicht von AI gesetzt)
for section in chapter["sections"]:
if "useAiCall" not in section:
contentType = section.get("content_type", "paragraph")
useAiCall = contentType != "paragraph"
# Prüfe contentPartInstructions
if not useAiCall:
for partId in section.get("contentPartIds", []):
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
useAiCall = True
break
section["useAiCall"] = useAiCall
return chapterStructure
async def _fillChapterSections(
self,
chapterStructure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
parentOperationId: str
) -> Dict[str, Any]:
"""
Phase 5D.2: Füllt Sections mit ContentParts.
"""
# Sammle alle Sections für sequenzielle Verarbeitung
sections_to_process = []
all_sections_list = [] # Für Kontext-Informationen
for doc in chapterStructure.get("documents", []):
for chapter in doc.get("chapters", []):
for section in chapter.get("sections", []):
all_sections_list.append(section)
sections_to_process.append((doc, chapter, section))
# Sequenzielle Section-Generierung
fillOperationId = parentOperationId
for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process):
sectionId = section.get("id")
contentPartIds = section.get("contentPartIds", [])
contentFormats = section.get("contentFormats", {})
generationHint = section.get("generation_hint")
contentType = section.get("content_type", "paragraph")
useAiCall = section.get("useAiCall", False)
elements = []
# Prüfe ob Aggregation nötig ist
needsAggregation = self._needsAggregation(
contentType=contentType,
contentPartCount=len(contentPartIds)
)
if needsAggregation and useAiCall:
# Aggregation: Alle Parts zusammen verarbeiten # Aggregation: Alle Parts zusammen verarbeiten
sectionParts = [ sectionParts = [
self._findContentPartById(pid, contentParts) self._findContentPartById(pid, contentParts)
@ -201,8 +320,8 @@ class StructureFiller:
}) })
logger.error(f"Error generating section {sectionId}: {str(e)}") logger.error(f"Error generating section {sectionId}: {str(e)}")
# NICHT raise - Section wird mit Fehlermeldung gerendert # NICHT raise - Section wird mit Fehlermeldung gerendert
else: else:
# Einzelverarbeitung: Jeder Part einzeln # Einzelverarbeitung: Jeder Part einzeln
for partId in contentPartIds: for partId in contentPartIds:
part = self._findContentPartById(partId, contentParts) part = self._findContentPartById(partId, contentParts)
@ -308,19 +427,429 @@ class StructureFiller:
"source": part.metadata.get("documentId"), "source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt") "extractionPrompt": part.metadata.get("extractionPrompt")
}) })
section["elements"] = elements
return chapterStructure
def _addContentPartsMetadata(
self,
structure: Dict[str, Any],
contentParts: List[ContentPart]
) -> Dict[str, Any]:
"""
Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind.
Dies hilft der Validierung, den Kontext der ContentParts zu verstehen.
"""
# Erstelle Mapping von ContentPart-ID zu Metadaten
contentPartsMap = {}
for part in contentParts:
contentPartsMap[part.id] = {
"id": part.id,
"format": part.metadata.get("contentFormat", "unknown"),
"type": part.typeGroup,
"mimeType": part.mimeType,
"originalFileName": part.metadata.get("originalFileName"),
"usageHint": part.metadata.get("usageHint"),
"documentId": part.metadata.get("documentId"),
"dataSize": len(str(part.data)) if part.data else 0
}
# Füge Metadaten zu Sections hinzu, die contentPartIds haben
for doc in structure.get("documents", []):
# Prüfe ob Chapters vorhanden sind (neue Struktur)
if "chapters" in doc:
for chapter in doc.get("chapters", []):
# Füge Metadaten zu Chapter-Level contentPartIds hinzu
chapterContentPartIds = chapter.get("contentPartIds", [])
if chapterContentPartIds:
chapter["contentPartsMetadata"] = []
for partId in chapterContentPartIds:
if partId in contentPartsMap:
chapter["contentPartsMetadata"].append(contentPartsMap[partId])
# Füge Metadaten zu Sections hinzu
for section in chapter.get("sections", []):
contentPartIds = section.get("contentPartIds", [])
if contentPartIds:
section["contentPartsMetadata"] = []
for partId in contentPartIds:
if partId in contentPartsMap:
section["contentPartsMetadata"].append(contentPartsMap[partId])
# Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur)
elif "sections" in doc:
for section in doc.get("sections", []):
contentPartIds = section.get("contentPartIds", [])
if contentPartIds:
section["contentPartsMetadata"] = []
for partId in contentPartIds:
if partId in contentPartsMap:
section["contentPartsMetadata"].append(contentPartsMap[partId])
return structure
def _flattenChaptersToSections(
self,
chapterStructure: Dict[str, Any]
) -> Dict[str, Any]:
"""
Flattening: Konvertiert Chapters zu finaler Section-Struktur.
Jedes Chapter wird zu einer Heading-Section + dessen Sections.
"""
result = {
"metadata": chapterStructure.get("metadata", {}),
"documents": []
}
for doc in chapterStructure.get("documents", []):
flattened_doc = {
"id": doc.get("id"),
"title": doc.get("title"),
"filename": doc.get("filename"),
"sections": []
}
for chapter in doc.get("chapters", []):
# 1. Vordefinierte Heading-Section für Chapter-Title
heading_section = {
"id": f"{chapter['id']}_heading",
"content_type": "heading",
"elements": [{
"type": "heading",
"content": chapter.get("title"),
"level": chapter.get("level", 1)
}]
}
flattened_doc["sections"].append(heading_section)
# 2. Generierte Sections
flattened_doc["sections"].extend(chapter.get("sections", []))
result["documents"].append(flattened_doc)
return result
async def _fillStructureLegacy(
self,
structure: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str,
fillOperationId: str
) -> Dict[str, Any]:
"""
Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität).
"""
# Starte ChatLog
self.services.chat.progressLogStart(
fillOperationId,
"Structure Filling (Legacy)",
"Filling",
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
parentOperationId=fillOperationId
)
try:
filledStructure = copy.deepcopy(structure)
# Sammle alle Sections
sections_to_process = []
all_sections_list = []
for doc in filledStructure.get("documents", []):
doc_sections = doc.get("sections", [])
all_sections_list.extend(doc_sections)
for section in doc_sections:
sections_to_process.append((doc, section))
# Verarbeite Sections (bestehende Logik)
for sectionIndex, (doc, section) in enumerate(sections_to_process):
sectionId = section.get("id")
contentPartIds = section.get("contentPartIds", [])
contentFormats = section.get("contentFormats", {})
generationHint = section.get("generation_hint")
contentType = section.get("content_type", "paragraph")
elements = []
# Prüfe ob Aggregation nötig ist
needsAggregation = self._needsAggregation(
contentType=contentType,
contentPartCount=len(contentPartIds)
)
if needsAggregation and generationHint:
# Aggregation: Alle Parts zusammen verarbeiten
sectionParts = [
self._findContentPartById(pid, contentParts)
for pid in contentPartIds
]
sectionParts = [p for p in sectionParts if p is not None]
if sectionParts:
# Filtere nur extracted Parts für Aggregation
extractedParts = [
p for p in sectionParts
if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
]
nonExtractedParts = [
p for p in sectionParts
if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
]
# Verarbeite non-extracted Parts separat
for part in nonExtractedParts:
contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
if contentFormat == "reference":
elements.append({
"type": "reference",
"documentReference": part.metadata.get("documentReference"),
"label": part.metadata.get("usageHint", part.label)
})
elif contentFormat == "object":
elements.append({
"type": part.typeGroup,
"base64Data": part.data,
"mimeType": part.mimeType,
"altText": part.metadata.get("usageHint", part.label)
})
# Aggregiere extracted Parts mit AI
if extractedParts:
generationPrompt = self._buildSectionGenerationPrompt(
section=section,
contentParts=extractedParts,
userPrompt=userPrompt,
generationHint=generationHint,
allSections=all_sections_list,
sectionIndex=sectionIndex,
isAggregation=True
)
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
self.services.chat.progressLogStart(
sectionOperationId,
"Section Generation (Aggregation)",
"Section",
f"Generating section {sectionId} with {len(extractedParts)} parts",
parentOperationId=fillOperationId
)
try:
self.services.utils.writeDebugFile(
generationPrompt,
f"section_content_{sectionId}_prompt"
)
request = AiCallRequest(
prompt=generationPrompt,
contentParts=extractedParts,
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.DETAILED
)
)
aiResponse = await self.aiService.callAi(request)
self.services.utils.writeDebugFile(
aiResponse.content,
f"section_content_{sectionId}_response"
)
generatedElements = json.loads(
self.services.utils.jsonExtractString(aiResponse.content)
)
if isinstance(generatedElements, list):
elements.extend(generatedElements)
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
elements.extend(generatedElements["elements"])
self.services.chat.progressLogFinish(sectionOperationId, True)
except Exception as e:
self.services.chat.progressLogFinish(sectionOperationId, False)
elements.append({
"type": "error",
"message": f"Error generating section {sectionId}: {str(e)}",
"sectionId": sectionId
})
logger.error(f"Error generating section {sectionId}: {str(e)}")
else:
# Einzelverarbeitung: Jeder Part einzeln
for partId in contentPartIds:
part = self._findContentPartById(partId, contentParts)
if not part:
continue
contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
if contentFormat == "reference":
elements.append({
"type": "reference",
"documentReference": part.metadata.get("documentReference"),
"label": part.metadata.get("usageHint", part.label)
})
elif contentFormat == "object":
elements.append({
"type": part.typeGroup,
"base64Data": part.data,
"mimeType": part.mimeType,
"altText": part.metadata.get("usageHint", part.label)
})
elif contentFormat == "extracted":
if generationHint:
# AI-Call mit einzelnen ContentPart
generationPrompt = self._buildSectionGenerationPrompt(
section=section,
contentParts=[part],
userPrompt=userPrompt,
generationHint=generationHint,
allSections=all_sections_list,
sectionIndex=sectionIndex,
isAggregation=False
)
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
self.services.chat.progressLogStart(
sectionOperationId,
"Section Generation",
"Section",
f"Generating section {sectionId}",
parentOperationId=fillOperationId
)
try:
self.services.utils.writeDebugFile(
generationPrompt,
f"section_content_{sectionId}_prompt"
)
request = AiCallRequest(
prompt=generationPrompt,
contentParts=[part],
options=AiCallOptions(
operationType=OperationTypeEnum.DATA_ANALYSE,
priority=PriorityEnum.BALANCED,
processingMode=ProcessingModeEnum.DETAILED
)
)
aiResponse = await self.aiService.callAi(request)
self.services.utils.writeDebugFile(
aiResponse.content,
f"section_content_{sectionId}_response"
)
generatedElements = json.loads(
self.services.utils.jsonExtractString(aiResponse.content)
)
if isinstance(generatedElements, list):
elements.extend(generatedElements)
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
elements.extend(generatedElements["elements"])
self.services.chat.progressLogFinish(sectionOperationId, True)
except Exception as e:
self.services.chat.progressLogFinish(sectionOperationId, False)
elements.append({
"type": "error",
"message": f"Error generating section {sectionId}: {str(e)}",
"sectionId": sectionId
})
logger.error(f"Error generating section {sectionId}: {str(e)}")
else:
elements.append({
"type": "extracted_text",
"content": part.data,
"source": part.metadata.get("documentId"),
"extractionPrompt": part.metadata.get("extractionPrompt")
})
section["elements"] = elements section["elements"] = elements
# ChatLog abschließen # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
self.services.chat.progressLogFinish(fillOperationId, True) filledStructure = self._addContentPartsMetadata(filledStructure, contentParts)
self.services.chat.progressLogFinish(fillOperationId, True)
return filledStructure return filledStructure
except Exception as e: except Exception as e:
self.services.chat.progressLogFinish(fillOperationId, False) self.services.chat.progressLogFinish(fillOperationId, False)
logger.error(f"Error in fillStructure: {str(e)}") logger.error(f"Error in _fillStructureLegacy: {str(e)}")
raise raise
def _buildChapterSectionsStructurePrompt(
self,
chapterId: str,
chapterLevel: int,
chapterTitle: str,
generationHint: str,
contentPartIds: List[str],
contentPartInstructions: Dict[str, Any],
contentParts: List[ContentPart],
userPrompt: str
) -> str:
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
# Baue ContentParts-Index (nur IDs, keine Previews!)
contentPartsIndex = ""
for partId in contentPartIds:
part = self._findContentPartById(partId, contentParts)
if not part:
continue
contentFormat = part.metadata.get("contentFormat", "unknown")
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
contentPartsIndex += f" Format: {contentFormat}\n"
contentPartsIndex += f" Type: {part.typeGroup}\n"
contentPartsIndex += f" Instruction: {instruction}\n"
if not contentPartsIndex:
contentPartsIndex = "\n(No content parts specified for this chapter)"
prompt = f"""TASK: Generate Chapter Sections Structure
CHAPTER METADATA:
- Chapter ID: {chapterId}
- Chapter Level: {chapterLevel}
- Chapter Title: {chapterTitle}
- Generation Hint: {generationHint}
WICHTIG: Chapter hat bereits vordefinierte Heading-Section.
Generiere NICHT eine Heading-Section für Chapter-Title!
AVAILABLE CONTENT PARTS:
{contentPartsIndex}
STANDARD JSON SCHEMA FOR SECTIONS:
Supported content_types: table, bullet_list, heading, paragraph, code_block, image
Return JSON:
{{
"sections": [
{{
"id": "section_1",
"content_type": "paragraph",
"contentPartIds": ["part_ext_1"],
"generationHint": "...",
"useAiCall": false,
"elements": []
}}
]
}}
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
"""
return prompt
def _buildSectionGenerationPrompt( def _buildSectionGenerationPrompt(
self, self,
section: Dict[str, Any], section: Dict[str, Any],

View file

@ -32,11 +32,12 @@ class StructureGenerator:
parentOperationId: str parentOperationId: str
) -> Dict[str, Any]: ) -> Dict[str, Any]:
""" """
Phase 5C: Generiert Dokument-Struktur mit Sections. Phase 5C: Generiert Chapter-Struktur (Table of Contents).
Jede Section spezifiziert: Definiert für jedes Chapter:
- Welcher Content sollte in dieser Section sein - Level, Title
- Welche ContentParts zu verwenden sind - contentPartIds
- Format für jeden ContentPart - contentPartInstructions
- generationHint
Args: Args:
userPrompt: User-Anfrage userPrompt: User-Anfrage
@ -45,7 +46,7 @@ class StructureGenerator:
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
Returns: Returns:
Struktur-Dict mit documents und sections Struktur-Dict mit documents und chapters (nicht sections!)
""" """
# Erstelle Operation-ID für Struktur-Generierung # Erstelle Operation-ID für Struktur-Generierung
structureOperationId = f"{parentOperationId}_structure_generation" structureOperationId = f"{parentOperationId}_structure_generation"
@ -53,25 +54,36 @@ class StructureGenerator:
# Starte ChatLog mit Parent-Referenz # Starte ChatLog mit Parent-Referenz
self.services.chat.progressLogStart( self.services.chat.progressLogStart(
structureOperationId, structureOperationId,
"Structure Generation", "Chapter Structure Generation",
"Structure", "Structure",
f"Generating structure for {outputFormat}", f"Generating chapter structure for {outputFormat}",
parentOperationId=parentOperationId parentOperationId=parentOperationId
) )
try: try:
# Baue Struktur-Prompt mit Content-Index # Baue Chapter-Struktur-Prompt mit Content-Index
structurePrompt = self._buildStructurePrompt( structurePrompt = self._buildChapterStructurePrompt(
userPrompt=userPrompt, userPrompt=userPrompt,
contentParts=contentParts, contentParts=contentParts,
outputFormat=outputFormat outputFormat=outputFormat
) )
# AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) # Debug: Log Prompt
# Debug-Logs werden bereits von callAiPlanning geschrieben self.services.utils.writeDebugFile(
structurePrompt,
"chapter_structure_generation_prompt"
)
# AI-Call für Chapter-Struktur-Generierung
aiResponse = await self.aiService.callAiPlanning( aiResponse = await self.aiService.callAiPlanning(
prompt=structurePrompt, prompt=structurePrompt,
debugType="document_generation_structure" debugType="chapter_structure_generation"
)
# Debug: Log Response
self.services.utils.writeDebugFile(
aiResponse,
"chapter_structure_generation_response"
) )
# Parse Struktur # Parse Struktur
@ -87,13 +99,13 @@ class StructureGenerator:
logger.error(f"Error in generateStructure: {str(e)}") logger.error(f"Error in generateStructure: {str(e)}")
raise raise
def _buildStructurePrompt( def _buildChapterStructurePrompt(
self, self,
userPrompt: str, userPrompt: str,
contentParts: List[ContentPart], contentParts: List[ContentPart],
outputFormat: str outputFormat: str
) -> str: ) -> str:
"""Baue Prompt für Struktur-Generierung.""" """Baue Prompt für Chapter-Struktur-Generierung."""
# Baue ContentParts-Index - filtere leere Parts heraus # Baue ContentParts-Index - filtere leere Parts heraus
contentPartsIndex = "" contentPartsIndex = ""
validParts = [] validParts = []
@ -179,14 +191,19 @@ class StructureGenerator:
AVAILABLE CONTENT PARTS: AVAILABLE CONTENT PARTS:
{contentPartsIndex} {contentPartsIndex}
TASK: Generiere Dokument-Struktur mit Sections. TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente.
Für jede Section, spezifiziere:
- section id Für jedes Chapter:
- content_type (heading, paragraph, image, table, etc.) - chapter id
- contentPartIds: [Liste von ContentPart-IDs zu verwenden] - level (1, 2, 3, etc.)
- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist - title
- generation_hint: Was AI für diese Section generieren soll - contentPartIds: [Liste von ContentPart-IDs]
- elements: [] (leer, wird in nächster Phase gefüllt) - contentPartInstructions: {{
"partId": {{
"instruction": "Wie Content strukturiert werden soll"
}}
}}
- generationHint: Beschreibung des Inhalts
OUTPUT FORMAT: {outputFormat} OUTPUT FORMAT: {outputFormat}
@ -200,24 +217,19 @@ RETURN JSON:
"id": "doc_1", "id": "doc_1",
"title": "Document Title", "title": "Document Title",
"filename": "document.{outputFormat}", "filename": "document.{outputFormat}",
"sections": [ "chapters": [
{{ {{
"id": "section_1", "id": "chapter_1",
"content_type": "heading", "level": 1,
"generation_hint": "Main title", "title": "Introduction",
"contentPartIds": [],
"contentFormats": {{}},
"elements": []
}},
{{
"id": "section_2",
"content_type": "paragraph",
"generation_hint": "Introduction paragraph",
"contentPartIds": ["part_ext_1"], "contentPartIds": ["part_ext_1"],
"contentFormats": {{ "contentPartInstructions": {{
"part_ext_1": "extracted" "part_ext_1": {{
"instruction": "Use full extracted text"
}}
}}, }},
"elements": [] "generationHint": "Create introduction section",
"sections": []
}} }}
] ]
}}] }}]

View file

@ -5,6 +5,7 @@ import uuid
import base64 import base64
import traceback import traceback
from typing import Any, Dict, List, Optional, Callable from typing import Any, Dict, List, Optional, Callable
from modules.datamodels.datamodelDocument import RenderedDocument
from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceGeneration.subDocumentUtility import ( from modules.services.serviceGeneration.subDocumentUtility import (
getFileExtension, getFileExtension,
@ -345,31 +346,31 @@ class GenerationService:
'workflowId': 'unknown' 'workflowId': 'unknown'
} }
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]: async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
""" """
Render extracted JSON content to the specified output format. Render extracted JSON content to the specified output format.
Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering). Processes EACH document separately and calls renderer for each.
Always uses unified "documents" array format. Each renderer can return 1..n documents (e.g., HTML + images).
Supports three content formats: reference, object (base64), extracted_text.
Args: Args:
extractedContent: Structured JSON document from AI extraction extractedContent: Structured JSON document with documents array
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
In future, each document can have its own format
title: Report title title: Report title
userPrompt: User's original prompt for report generation userPrompt: User's original prompt for report generation
aiService: AI service instance for generation prompt creation aiService: AI service instance for generation prompt creation
parentOperationId: Optional parent operation ID for hierarchical logging parentOperationId: Optional parent operation ID for hierarchical logging
Returns: Returns:
tuple: (rendered_content, mime_type, images_list) List of RenderedDocument objects.
images_list: List of image dicts with base64Data, altText, caption, etc. Each RenderedDocument represents one rendered file (main document or supporting file)
""" """
try: try:
# Validate JSON input # Validate JSON input
if not isinstance(extractedContent, dict): if not isinstance(extractedContent, dict):
raise ValueError("extractedContent must be a JSON dictionary") raise ValueError("extractedContent must be a JSON dictionary")
# Unified approach: Always expect "documents" array (single doc = n=1) # Unified approach: Always expect "documents" array
if "documents" not in extractedContent: if "documents" not in extractedContent:
raise ValueError("extractedContent must contain 'documents' array") raise ValueError("extractedContent must contain 'documents' array")
@ -377,56 +378,45 @@ class GenerationService:
if len(documents) == 0: if len(documents) == 0:
raise ValueError("No documents found in 'documents' array") raise ValueError("No documents found in 'documents' array")
# Phase 5: Multi-Dokument-Rendering metadata = extractedContent.get("metadata", {})
if len(documents) == 1: allRenderedDocuments = []
# Single document - use existing logic
single_doc = documents[0] # Process EACH document separately
if "sections" not in single_doc: for docIndex, doc in enumerate(documents):
raise ValueError("Document must contain 'sections' field") if not isinstance(doc, dict):
logger.warning(f"Skipping invalid document at index {docIndex}")
continue
# Pass standardized schema to renderer (maintains architecture) if "sections" not in doc:
contentToRender = extractedContent # Pass full standardized schema logger.warning(f"Document {doc.get('id', docIndex)} has no sections, skipping")
else: continue
# Multiple documents - merge all sections into one document for rendering
# Option: Merge all sections from all documents into a single document
all_sections = []
for doc in documents:
if isinstance(doc, dict) and "sections" in doc:
sections = doc.get("sections", [])
if isinstance(sections, list):
all_sections.extend(sections)
if not all_sections: # Determine format for this document
raise ValueError("No sections found in any document") # TODO: In future, each document can have its own format field
# For now, use the global outputFormat
docFormat = doc.get("format", outputFormat)
# Create merged document with all sections # Get renderer for this document's format
merged_document = { renderer = self._getFormatRenderer(docFormat)
"metadata": extractedContent.get("metadata", {}), if not renderer:
"documents": [{ logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
"id": "merged", continue
"title": title,
"filename": f"{title}.{outputFormat}", # Create JSON structure with single document (preserving metadata)
"sections": all_sections singleDocContent = {
}] "metadata": metadata,
"documents": [doc] # Only this document
} }
contentToRender = merged_document
logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections") # Use document title or fallback to provided title
docTitle = doc.get("title", title)
# Get the appropriate renderer for the format
renderer = self._getFormatRenderer(outputFormat) # Render this document (can return multiple files, e.g., HTML + images)
if not renderer: renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
raise ValueError(f"Unsupported output format: {outputFormat}") allRenderedDocuments.extend(renderedDocs)
# Render the JSON content directly (AI generation handled by main service) logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
# Renderer receives standardized schema and extracts what it needs return allRenderedDocuments
renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
# Get images from renderer if available
images = []
if hasattr(renderer, 'getRenderedImages'):
images = renderer.getRenderedImages()
return renderedContent, mimeType, images
except Exception as e: except Exception as e:
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")

View file

@ -5,8 +5,9 @@ Base renderer class for all format renderers.
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Dict, Any, Tuple, List from typing import Dict, Any, List
from modules.datamodels.datamodelJson import supportedSectionTypes from modules.datamodels.datamodelJson import supportedSectionTypes
from modules.datamodels.datamodelDocument import RenderedDocument
import json import json
import logging import logging
import re import re
@ -50,21 +51,49 @@ class BaseRenderer(ABC):
return 0 return 0
@abstractmethod @abstractmethod
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
""" """
Render extracted JSON content to the target format. Render extracted JSON content to multiple documents.
Each renderer must implement this method.
Can return 1..n documents (e.g., HTML + images).
Args: Args:
extractedContent: Structured JSON content with sections and metadata extractedContent: Structured JSON content with sections and metadata (contains single document)
title: Report title title: Report title
userPrompt: Original user prompt for context userPrompt: Original user prompt for context
aiService: AI service instance for additional processing aiService: AI service instance for additional processing
Returns: Returns:
tuple: (renderedContent, mimeType) List of RenderedDocument objects.
First document is the main document, additional documents are supporting files (e.g., images).
Even if only one document is returned, it must be wrapped in a list.
""" """
pass pass
def _determineFilename(self, title: str, mimeType: str) -> str:
"""Determine filename from title and mimeType."""
import re
# Get extension from mimeType
extensionMap = {
"text/html": "html",
"application/pdf": "pdf",
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
"text/plain": "txt",
"text/markdown": "md",
"application/json": "json",
"text/csv": "csv"
}
extension = extensionMap.get(mimeType, "txt")
# Sanitize title for filename
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title)
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
if not sanitized:
sanitized = "document"
return f"{sanitized}.{extension}"
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
""" """
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}

View file

@ -5,7 +5,8 @@ CSV renderer for report generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
class RendererCsv(BaseRenderer): class RendererCsv(BaseRenderer):
"""Renders content to CSV format with format-specific extraction.""" """Renders content to CSV format with format-specific extraction."""
@ -25,13 +26,28 @@ class RendererCsv(BaseRenderer):
"""Return priority for CSV renderer.""" """Return priority for CSV renderer."""
return 70 return 70
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to CSV format.""" """Render extracted JSON content to CSV format."""
try: try:
# Generate CSV directly from JSON (no styling needed for CSV) # Generate CSV directly from JSON (no styling needed for CSV)
csvContent = await self._generateCsvFromJson(extractedContent, title) csvContent = await self._generateCsvFromJson(extractedContent, title)
return csvContent, "text/csv" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "text/csv")
else:
filename = self._determineFilename(title, "text/csv")
return [
RenderedDocument(
documentData=csvContent.encode('utf-8'),
mimeType="text/csv",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering CSV: {str(e)}") self.logger.error(f"Error rendering CSV: {str(e)}")

View file

@ -5,7 +5,8 @@ DOCX renderer for report generation using python-docx.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import io import io
import base64 import base64
import re import re
@ -38,7 +39,7 @@ class RendererDocx(BaseRenderer):
"""Return priority for DOCX renderer.""" """Return priority for DOCX renderer."""
return 115 return 115
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to DOCX format using AI-analyzed styling.""" """Render extracted JSON content to DOCX format using AI-analyzed styling."""
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER") self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
try: try:
@ -46,18 +47,48 @@ class RendererDocx(BaseRenderer):
# Fallback to HTML if python-docx not available # Fallback to HTML if python-docx not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
htmlRenderer = RendererHtml() htmlRenderer = RendererHtml()
htmlContent, _ = await htmlRenderer.render(extractedContent, title) return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
return htmlContent, "text/html"
# Generate DOCX using AI-analyzed styling # Generate DOCX using AI-analyzed styling
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService) docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
else:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
# Convert DOCX content to bytes if it's a string (base64)
if isinstance(docx_content, str):
try:
docx_bytes = base64.b64decode(docx_content)
except Exception:
docx_bytes = docx_content.encode('utf-8')
else:
docx_bytes = docx_content
return [
RenderedDocument(
documentData=docx_bytes,
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering DOCX: {str(e)}") self.logger.error(f"Error rendering DOCX: {str(e)}")
# Return minimal fallback # Return minimal fallback
return f"DOCX Generation Error: {str(e)}", "text/plain" fallbackContent = f"DOCX Generation Error: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate DOCX content from structured JSON document.""" """Generate DOCX content from structured JSON document."""

View file

@ -5,7 +5,8 @@ HTML renderer for report generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
class RendererHtml(BaseRenderer): class RendererHtml(BaseRenderer):
"""Renders content to HTML format with format-specific extraction.""" """Renders content to HTML format with format-specific extraction."""
@ -25,29 +26,66 @@ class RendererHtml(BaseRenderer):
"""Return priority for HTML renderer.""" """Return priority for HTML renderer."""
return 100 return 100
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to HTML format using AI-analyzed styling.""" """
try: Render HTML document with images as separate files.
# Extract images first Returns list of documents: [HTML document, image1, image2, ...]
images = self._extractImages(extractedContent) """
import base64
# Extract images first
images = self._extractImages(extractedContent)
# Store images in instance for later retrieval
self._renderedImages = images
# Generate HTML using AI-analyzed styling
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
# Replace base64 data URIs with relative file paths if images exist
if images:
htmlContent = self._replaceImageDataUris(htmlContent, images)
# Determine HTML filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
htmlFilename = documents[0].get("filename")
if not htmlFilename:
htmlFilename = self._determineFilename(title, "text/html")
else:
htmlFilename = self._determineFilename(title, "text/html")
# Start with HTML document
resultDocuments = [
RenderedDocument(
documentData=htmlContent.encode('utf-8'),
mimeType="text/html",
filename=htmlFilename
)
]
# Add images as separate documents
for img in images:
base64Data = img.get("base64Data", "")
filename = img.get("filename", f"image_{len(resultDocuments)}.png")
mimeType = img.get("mimeType", "image/png")
# Store images in instance for later retrieval if base64Data:
self._renderedImages = images try:
# Decode base64 to bytes
# Generate HTML using AI-analyzed styling imageBytes = base64.b64decode(base64Data)
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService) resultDocuments.append(
RenderedDocument(
# Replace base64 data URIs with relative file paths if images exist documentData=imageBytes,
if images: mimeType=mimeType,
htmlContent = self._replaceImageDataUris(htmlContent, images) filename=filename
)
return htmlContent, "text/html" )
self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)")
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering HTML: {str(e)}") self.logger.warning(f"Error creating image file {filename}: {str(e)}")
# Return minimal HTML fallback
self._renderedImages = [] # Initialize empty list on error return resultDocuments
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate HTML content from structured JSON document using AI-generated styling.""" """Generate HTML content from structured JSON document using AI-generated styling."""
@ -597,8 +635,31 @@ class RendererHtml(BaseRenderer):
if base64Data: if base64Data:
sectionId = section.get("id", "unknown") sectionId = section.get("id", "unknown")
# Bestimme MIME-Type und Extension
mimeType = element.get("mimeType", "image/png")
if not mimeType or mimeType == "unknown":
# Versuche MIME-Type aus base64 zu erkennen
if base64Data.startswith("/9j/"):
mimeType = "image/jpeg"
elif base64Data.startswith("iVBORw0KGgo"):
mimeType = "image/png"
else:
mimeType = "image/png" # Default
# Bestimme Extension basierend auf MIME-Type
extension = "png"
if mimeType == "image/jpeg" or mimeType == "image/jpg":
extension = "jpg"
elif mimeType == "image/png":
extension = "png"
elif mimeType == "image/gif":
extension = "gif"
elif mimeType == "image/webp":
extension = "webp"
# Generate filename from section ID # Generate filename from section ID
filename = f"{sectionId}.png" filename = f"{sectionId}.{extension}"
# Clean filename (remove invalid characters) # Clean filename (remove invalid characters)
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename) filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
@ -607,7 +668,8 @@ class RendererHtml(BaseRenderer):
"altText": element.get("altText", "Image"), "altText": element.get("altText", "Image"),
"caption": element.get("caption"), "caption": element.get("caption"),
"sectionId": sectionId, "sectionId": sectionId,
"filename": filename "filename": filename,
"mimeType": mimeType
}) })
self.logger.debug(f"Extracted image from section {sectionId}: {filename}") self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
@ -633,8 +695,9 @@ class RendererHtml(BaseRenderer):
import base64 import base64
import re import re
# Find all image data URIs in HTML # Find all image data URIs in HTML (verschiedene MIME-Types unterstützen)
dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)' # Pattern: data:image/[type];base64,<base64>
dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)'
def replaceDataUri(match): def replaceDataUri(match):
base64Data = match.group(1) base64Data = match.group(1)
@ -642,7 +705,9 @@ class RendererHtml(BaseRenderer):
# Find matching image in images list # Find matching image in images list
matchingImage = None matchingImage = None
for img in images: for img in images:
if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]): imgBase64 = img.get("base64Data", "")
# Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding)
if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]):
matchingImage = img matchingImage = img
break break
@ -650,20 +715,25 @@ class RendererHtml(BaseRenderer):
# Use filename from image data (generated from section ID) # Use filename from image data (generated from section ID)
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png") filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
# Replace with relative path # Replace with relative path (ohne Pfad, nur Dateiname)
altText = matchingImage.get("altText", "Image") altText = matchingImage.get("altText", "Image")
caption = matchingImage.get("caption", "") caption = matchingImage.get("caption", "")
# Entferne IMAGE_MARKER Kommentar falls vorhanden
imgTag = f'<img src="{filename}" alt="{altText}">'
if caption: if caption:
return f'<figure><img src="{filename}" alt="{altText}"><figcaption>{caption}</figcaption></figure>' return f'<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
else: else:
return f'<img src="{filename}" alt="{altText}">' return imgTag
else: else:
# Keep original if no match found # Keep original if no match found
return match.group(0) return match.group(0)
# Replace all data URIs # Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen)
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent) updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
# Entferne IMAGE_MARKER Kommentare die übrig geblieben sind
updatedHtml = re.sub(r'<!--IMAGE_MARKER:[^>]+-->', '', updatedHtml)
return updatedHtml return updatedHtml

View file

@ -5,8 +5,10 @@ Image renderer for report generation using AI image generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import logging import logging
import base64
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -28,13 +30,37 @@ class RendererImage(BaseRenderer):
"""Return priority for image renderer.""" """Return priority for image renderer."""
return 90 return 90
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to image format using AI image generation.""" """Render extracted JSON content to image format using AI image generation."""
try: try:
# Generate AI image from content # Generate AI image from content
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService) imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
return imageContent, "image/png" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "image/png")
else:
filename = self._determineFilename(title, "image/png")
# Convert image content to bytes (base64 string or bytes)
if isinstance(imageContent, str):
try:
imageBytes = base64.b64decode(imageContent)
except Exception:
imageBytes = imageContent.encode('utf-8')
else:
imageBytes = imageContent
return [
RenderedDocument(
documentData=imageBytes,
mimeType="image/png",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering image: {str(e)}") self.logger.error(f"Error rendering image: {str(e)}")

View file

@ -5,7 +5,8 @@ JSON renderer for report generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import json import json
class RendererJson(BaseRenderer): class RendererJson(BaseRenderer):
@ -26,14 +27,29 @@ class RendererJson(BaseRenderer):
"""Return priority for JSON renderer.""" """Return priority for JSON renderer."""
return 80 return 80
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to JSON format.""" """Render extracted JSON content to JSON format."""
try: try:
# The extracted content should already be JSON from the AI # The extracted content should already be JSON from the AI
# Just validate and format it # Just validate and format it
jsonContent = self._cleanJsonContent(extractedContent, title) jsonContent = self._cleanJsonContent(extractedContent, title)
return jsonContent, "application/json" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/json")
else:
filename = self._determineFilename(title, "application/json")
return [
RenderedDocument(
documentData=jsonContent.encode('utf-8'),
mimeType="application/json",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering JSON: {str(e)}") self.logger.error(f"Error rendering JSON: {str(e)}")
@ -43,7 +59,14 @@ class RendererJson(BaseRenderer):
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}], "sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
"metadata": {"error": str(e)} "metadata": {"error": str(e)}
} }
return json.dumps(fallbackData, indent=2), "application/json" fallbackContent = json.dumps(fallbackData, indent=2)
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="application/json",
filename=self._determineFilename(title, "application/json")
)
]
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str: def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
"""Clean and validate JSON content from AI.""" """Clean and validate JSON content from AI."""

View file

@ -5,7 +5,8 @@ Markdown renderer for report generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
class RendererMarkdown(BaseRenderer): class RendererMarkdown(BaseRenderer):
"""Renders content to Markdown format with format-specific extraction.""" """Renders content to Markdown format with format-specific extraction."""
@ -25,18 +26,40 @@ class RendererMarkdown(BaseRenderer):
"""Return priority for markdown renderer.""" """Return priority for markdown renderer."""
return 95 return 95
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Markdown format.""" """Render extracted JSON content to Markdown format."""
try: try:
# Generate markdown from JSON structure # Generate markdown from JSON structure
markdownContent = self._generateMarkdownFromJson(extractedContent, title) markdownContent = self._generateMarkdownFromJson(extractedContent, title)
return markdownContent, "text/markdown" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "text/markdown")
else:
filename = self._determineFilename(title, "text/markdown")
return [
RenderedDocument(
documentData=markdownContent.encode('utf-8'),
mimeType="text/markdown",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering markdown: {str(e)}") self.logger.error(f"Error rendering markdown: {str(e)}")
# Return minimal markdown fallback # Return minimal markdown fallback
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown" fallbackContent = f"# {title}\n\nError rendering report: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/markdown",
filename=self._determineFilename(title, "text/markdown")
)
]
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate markdown content from structured JSON document.""" """Generate markdown content from structured JSON document."""

View file

@ -5,7 +5,8 @@ PDF renderer for report generation using reportlab.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import io import io
import base64 import base64
@ -38,25 +39,56 @@ class RendererPdf(BaseRenderer):
"""Return priority for PDF renderer.""" """Return priority for PDF renderer."""
return 120 return 120
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to PDF format using AI-analyzed styling.""" """Render extracted JSON content to PDF format using AI-analyzed styling."""
try: try:
if not REPORTLAB_AVAILABLE: if not REPORTLAB_AVAILABLE:
# Fallback to HTML if reportlab not available # Fallback to HTML if reportlab not available
from .rendererHtml import RendererHtml from .rendererHtml import RendererHtml
html_renderer = RendererHtml() html_renderer = RendererHtml()
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService) return await html_renderer.render(extractedContent, title, userPrompt, aiService)
return html_content, "text/html"
# Generate PDF using AI-analyzed styling # Generate PDF using AI-analyzed styling
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService) pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
return pdf_content, "application/pdf" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/pdf")
else:
filename = self._determineFilename(title, "application/pdf")
# Convert PDF content to bytes if it's a string (base64)
if isinstance(pdf_content, str):
# Try to decode as base64, otherwise encode as UTF-8
try:
pdf_bytes = base64.b64decode(pdf_content)
except Exception:
pdf_bytes = pdf_content.encode('utf-8')
else:
pdf_bytes = pdf_content
return [
RenderedDocument(
documentData=pdf_bytes,
mimeType="application/pdf",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering PDF: {str(e)}") self.logger.error(f"Error rendering PDF: {str(e)}")
# Return minimal fallback # Return minimal fallback
return f"PDF Generation Error: {str(e)}", "text/plain" fallbackContent = f"PDF Generation Error: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str: async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
"""Generate PDF content from structured JSON document using AI-generated styling.""" """Generate PDF content from structured JSON document using AI-generated styling."""

View file

@ -6,8 +6,9 @@ import io
import json import json
import re import re
from datetime import datetime, UTC from datetime import datetime, UTC
from typing import Dict, Any, Optional, Tuple, List from typing import Dict, Any, Optional, List
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from modules.datamodels.datamodelDocument import RenderedDocument
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -25,7 +26,7 @@ class RendererPptx(BaseRenderer):
"""Get list of supported output formats.""" """Get list of supported output formats."""
return ["pptx", "ppt"] return ["pptx", "ppt"]
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
""" """
Render content as PowerPoint presentation from JSON data. Render content as PowerPoint presentation from JSON data.
@ -204,14 +205,44 @@ class RendererPptx(BaseRenderer):
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8') pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes") logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
# Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
else:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
return [
RenderedDocument(
documentData=pptx_bytes,
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
filename=filename
)
]
except ImportError: except ImportError:
logger.error("python-pptx library not installed. Install with: pip install python-pptx") logger.error("python-pptx library not installed. Install with: pip install python-pptx")
return "python-pptx library not installed", "text/plain" fallbackContent = "python-pptx library not installed"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
except Exception as e: except Exception as e:
logger.error(f"Error rendering PowerPoint presentation: {str(e)}") logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain" fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
def _parseContentToSlides(self, content: str, title: str) -> list: def _parseContentToSlides(self, content: str, title: str) -> list:
""" """

View file

@ -5,7 +5,8 @@ Text renderer for report generation.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
class RendererText(BaseRenderer): class RendererText(BaseRenderer):
"""Renders content to plain text format with format-specific extraction.""" """Renders content to plain text format with format-specific extraction."""
@ -47,18 +48,40 @@ class RendererText(BaseRenderer):
"""Return priority for text renderer.""" """Return priority for text renderer."""
return 90 return 90
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to plain text format.""" """Render extracted JSON content to plain text format."""
try: try:
# Generate text from JSON structure # Generate text from JSON structure
textContent = self._generateTextFromJson(extractedContent, title) textContent = self._generateTextFromJson(extractedContent, title)
return textContent, "text/plain" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "text/plain")
else:
filename = self._determineFilename(title, "text/plain")
return [
RenderedDocument(
documentData=textContent.encode('utf-8'),
mimeType="text/plain",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering text: {str(e)}") self.logger.error(f"Error rendering text: {str(e)}")
# Return minimal text fallback # Return minimal text fallback
return f"{title}\n\nError rendering report: {str(e)}", "text/plain" fallbackContent = f"{title}\n\nError rendering report: {str(e)}"
return [
RenderedDocument(
documentData=fallbackContent.encode('utf-8'),
mimeType="text/plain",
filename=self._determineFilename(title, "text/plain")
)
]
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str: def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
"""Generate text content from structured JSON document.""" """Generate text content from structured JSON document."""

View file

@ -5,7 +5,8 @@ Excel renderer for report generation using openpyxl.
""" """
from .rendererBaseTemplate import BaseRenderer from .rendererBaseTemplate import BaseRenderer
from typing import Dict, Any, Tuple, List from modules.datamodels.datamodelDocument import RenderedDocument
from typing import Dict, Any, List
import io import io
import base64 import base64
from datetime import datetime, UTC from datetime import datetime, UTC
@ -37,20 +38,43 @@ class RendererXlsx(BaseRenderer):
"""Return priority for Excel renderer.""" """Return priority for Excel renderer."""
return 110 return 110
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]: async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
"""Render extracted JSON content to Excel format using AI-analyzed styling.""" """Render extracted JSON content to Excel format using AI-analyzed styling."""
try: try:
if not OPENPYXL_AVAILABLE: if not OPENPYXL_AVAILABLE:
# Fallback to CSV if openpyxl not available # Fallback to CSV if openpyxl not available
from .rendererCsv import RendererCsv from .rendererCsv import RendererCsv
csvRenderer = RendererCsv() csvRenderer = RendererCsv()
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService) return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
return csvContent, "text/csv"
# Generate Excel using AI-analyzed styling # Generate Excel using AI-analyzed styling
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService) excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" # Determine filename from document or title
documents = extractedContent.get("documents", [])
if documents and isinstance(documents[0], dict):
filename = documents[0].get("filename")
if not filename:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
else:
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
# Convert Excel content to bytes if it's a string (base64)
if isinstance(excelContent, str):
try:
excel_bytes = base64.b64decode(excelContent)
except Exception:
excel_bytes = excelContent.encode('utf-8')
else:
excel_bytes = excelContent
return [
RenderedDocument(
documentData=excel_bytes,
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
filename=filename
)
]
except Exception as e: except Exception as e:
self.logger.error(f"Error rendering Excel: {str(e)}") self.logger.error(f"Error rendering Excel: {str(e)}")

View file

@ -106,6 +106,18 @@ class ContentValidator:
if section.get("textPreview"): if section.get("textPreview"):
sectionSummary["textPreview"] = section.get("textPreview") sectionSummary["textPreview"] = section.get("textPreview")
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
contentPartIds = section.get("contentPartIds", [])
if contentPartIds and not elements:
# Prüfe ob contentPartsMetadata vorhanden ist
contentPartsMetadata = section.get("contentPartsMetadata", [])
if contentPartsMetadata:
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
else:
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
sectionSummary["contentPartIds"] = contentPartIds
sectionSummary["note"] = "ContentParts referenced but metadata not available"
# Include any additional fields from section (generic approach) # Include any additional fields from section (generic approach)
# This ensures all action-specific fields are preserved # This ensures all action-specific fields are preserved
for key, value in section.items(): for key, value in section.items():
@ -141,6 +153,18 @@ class ContentValidator:
sectionSummary["rowCount"] = len(rows) sectionSummary["rowCount"] = len(rows)
sectionSummary["headers"] = headers sectionSummary["headers"] = headers
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
contentPartIds = section.get("contentPartIds", [])
if contentPartIds and not elements:
# Prüfe ob contentPartsMetadata vorhanden ist
contentPartsMetadata = section.get("contentPartsMetadata", [])
if contentPartsMetadata:
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
else:
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
sectionSummary["contentPartIds"] = contentPartIds
sectionSummary["note"] = "ContentParts referenced but metadata not available"
# Include any additional fields from section (generic approach) # Include any additional fields from section (generic approach)
for key, value in section.items(): for key, value in section.items():
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately