enhanced generation engine with chapters as structure, renderers to render a pipeline and deliver 1..n documents
This commit is contained in:
parent
9d4bd8ceef
commit
723f98ea7a
17 changed files with 1141 additions and 264 deletions
|
|
@ -107,5 +107,17 @@ class StructuredDocument(BaseModel):
|
|||
|
||||
|
||||
|
||||
class RenderedDocument(BaseModel):
|
||||
"""A single rendered document from a renderer."""
|
||||
documentData: bytes = Field(description="Document content as bytes")
|
||||
mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')")
|
||||
filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')")
|
||||
|
||||
class Config:
|
||||
json_encoders = {
|
||||
bytes: lambda v: v.decode('utf-8', errors='replace') if isinstance(v, bytes) else v
|
||||
}
|
||||
|
||||
|
||||
# Update forward references
|
||||
ListItem.model_rebuild()
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionS
|
|||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||
from modules.shared.jsonUtils import (
|
||||
extractJsonString,
|
||||
|
|
@ -494,20 +495,21 @@ Respond with ONLY a JSON object in this exact format:
|
|||
title: str,
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Tuple[bytes, str]:
|
||||
) -> List[RenderedDocument]:
|
||||
"""
|
||||
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
||||
Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert.
|
||||
Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
|
||||
|
||||
Args:
|
||||
filledStructure: Gefüllte Struktur mit elements
|
||||
outputFormat: Ziel-Format (pdf, docx, html, etc.)
|
||||
outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
|
||||
title: Dokument-Titel
|
||||
userPrompt: User-Anfrage
|
||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||
|
||||
Returns:
|
||||
Tuple von (renderedContent, mimeType)
|
||||
List of RenderedDocument objects.
|
||||
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
|
||||
"""
|
||||
# Erstelle Operation-ID für Rendering
|
||||
renderOperationId = f"{parentOperationId}_rendering"
|
||||
|
|
@ -526,51 +528,21 @@ Respond with ONLY a JSON object in this exact format:
|
|||
|
||||
generationService = GenerationService(self.services)
|
||||
|
||||
# Multi-Dokument-Rendering
|
||||
documents = filledStructure.get("documents", [])
|
||||
|
||||
if len(documents) == 1:
|
||||
# Einzelnes Dokument - wie bisher
|
||||
renderedContent, mimeType, images = await generationService.renderReport(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
title,
|
||||
userPrompt,
|
||||
self,
|
||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
||||
)
|
||||
else:
|
||||
# Mehrere Dokumente - rendere alle
|
||||
# Option: Alle Sections zusammenführen und als ein Dokument rendern
|
||||
all_sections = []
|
||||
for doc in documents:
|
||||
if "sections" in doc:
|
||||
all_sections.extend(doc.get("sections", []))
|
||||
|
||||
# Erstelle temporäres Dokument mit allen Sections
|
||||
merged_document = {
|
||||
"metadata": filledStructure["metadata"],
|
||||
"documents": [{
|
||||
"id": "merged",
|
||||
"title": title,
|
||||
"filename": f"{title}.{outputFormat}",
|
||||
"sections": all_sections
|
||||
}]
|
||||
}
|
||||
|
||||
renderedContent, mimeType, images = await generationService.renderReport(
|
||||
merged_document,
|
||||
outputFormat,
|
||||
title,
|
||||
userPrompt,
|
||||
self,
|
||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
||||
)
|
||||
# renderReport verarbeitet jetzt jedes Dokument einzeln
|
||||
# und gibt Liste von (documentData, mimeType, filename) zurück
|
||||
renderedDocuments = await generationService.renderReport(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
title,
|
||||
userPrompt,
|
||||
self,
|
||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
||||
)
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(renderOperationId, True)
|
||||
|
||||
return renderedContent, mimeType
|
||||
return renderedDocuments
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(renderOperationId, False)
|
||||
|
|
@ -712,7 +684,8 @@ Respond with ONLY a JSON object in this exact format:
|
|||
)
|
||||
|
||||
# Schritt 5E: Rendere Resultat
|
||||
renderedContent, mimeType = await self._renderResult(
|
||||
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||
renderedDocuments = await self._renderResult(
|
||||
filledStructure,
|
||||
outputFormat,
|
||||
title or "Generated Document",
|
||||
|
|
@ -720,15 +693,24 @@ Respond with ONLY a JSON object in this exact format:
|
|||
aiOperationId
|
||||
)
|
||||
|
||||
# Baue Response
|
||||
documentName = self._determineDocumentName(filledStructure, outputFormat, title)
|
||||
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||
documentDataList = []
|
||||
for renderedDoc in renderedDocuments:
|
||||
try:
|
||||
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||
docDataObj = DocumentData(
|
||||
documentName=renderedDoc.filename,
|
||||
documentData=renderedDoc.documentData,
|
||||
mimeType=renderedDoc.mimeType,
|
||||
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||
)
|
||||
documentDataList.append(docDataObj)
|
||||
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||
except Exception as e:
|
||||
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
||||
|
||||
docData = DocumentData(
|
||||
documentName=documentName,
|
||||
documentData=renderedContent,
|
||||
mimeType=mimeType,
|
||||
sourceJson=filledStructure
|
||||
)
|
||||
if not documentDataList:
|
||||
raise ValueError("No documents were rendered")
|
||||
|
||||
metadata = AiResponseMetadata(
|
||||
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
||||
|
|
@ -746,7 +728,7 @@ Respond with ONLY a JSON object in this exact format:
|
|||
return AiResponse(
|
||||
content=json.dumps(filledStructure),
|
||||
metadata=metadata,
|
||||
documents=[docData]
|
||||
documents=documentDataList
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
|
|
|
|||
|
|
@ -35,65 +35,184 @@ class StructureFiller:
|
|||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5D: Füllt Struktur mit tatsächlichem Content.
|
||||
Für jede Section:
|
||||
- Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format
|
||||
- Wenn generation_hint spezifiziert: Generiere AI-Content
|
||||
Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz).
|
||||
|
||||
**Implementierungsdetails:**
|
||||
- Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung)
|
||||
- Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses)
|
||||
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter
|
||||
Phase 5D.2: Füllt Sections mit ContentParts
|
||||
|
||||
Args:
|
||||
structure: Struktur-Dict mit documents und sections
|
||||
structure: Struktur-Dict mit documents und chapters (nicht sections!)
|
||||
contentParts: Alle vorbereiteten ContentParts
|
||||
userPrompt: User-Anfrage
|
||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||
|
||||
Returns:
|
||||
Gefüllte Struktur mit elements in jeder Section
|
||||
Gefüllte Struktur mit elements in jeder Section (nach Flattening)
|
||||
"""
|
||||
# Erstelle Operation-ID für Struktur-Abfüllen
|
||||
fillOperationId = f"{parentOperationId}_structure_filling"
|
||||
|
||||
# Prüfe ob Struktur Chapters oder Sections hat
|
||||
hasChapters = False
|
||||
for doc in structure.get("documents", []):
|
||||
if "chapters" in doc:
|
||||
hasChapters = True
|
||||
break
|
||||
|
||||
if not hasChapters:
|
||||
# Fallback: Alte Struktur mit Sections direkt - verwende alte Logik
|
||||
logger.warning("Structure has no chapters, using legacy section-based filling")
|
||||
return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId)
|
||||
|
||||
# Starte ChatLog mit Parent-Referenz
|
||||
chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", []))
|
||||
self.services.chat.progressLogStart(
|
||||
fillOperationId,
|
||||
"Structure Filling",
|
||||
"Chapter Content Generation",
|
||||
"Filling",
|
||||
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
|
||||
f"Processing {chapterCount} chapters",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
filledStructure = copy.deepcopy(structure)
|
||||
|
||||
# Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden)
|
||||
sections_to_process = []
|
||||
all_sections_list = [] # Für Kontext-Informationen
|
||||
for doc in filledStructure.get("documents", []):
|
||||
doc_sections = doc.get("sections", [])
|
||||
all_sections_list.extend(doc_sections)
|
||||
for section in doc_sections:
|
||||
sections_to_process.append((doc, section))
|
||||
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
||||
filledStructure = await self._generateChapterSectionsStructure(
|
||||
filledStructure, contentParts, userPrompt, fillOperationId
|
||||
)
|
||||
|
||||
# Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden)
|
||||
for sectionIndex, (doc, section) in enumerate(sections_to_process):
|
||||
sectionId = section.get("id")
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
contentFormats = section.get("contentFormats", {})
|
||||
generationHint = section.get("generation_hint")
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
# Phase 5D.2: Sections mit ContentParts füllen
|
||||
filledStructure = await self._fillChapterSections(
|
||||
filledStructure, contentParts, userPrompt, fillOperationId
|
||||
)
|
||||
|
||||
elements = []
|
||||
# Flattening: Chapters zu Sections konvertieren
|
||||
flattenedStructure = self._flattenChaptersToSections(filledStructure)
|
||||
|
||||
# Prüfe ob Aggregation nötig ist
|
||||
needsAggregation = self._needsAggregation(
|
||||
contentType=contentType,
|
||||
contentPartCount=len(contentPartIds)
|
||||
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
||||
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||
|
||||
return flattenedStructure
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(fillOperationId, False)
|
||||
logger.error(f"Error in fillStructure: {str(e)}")
|
||||
raise
|
||||
|
||||
async def _generateChapterSectionsStructure(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content).
|
||||
Sections enthalten: content_type, contentPartIds, generationHint, useAiCall
|
||||
"""
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
for chapter in doc.get("chapters", []):
|
||||
chapterId = chapter.get("id", "unknown")
|
||||
chapterLevel = chapter.get("level", 1)
|
||||
chapterTitle = chapter.get("title", "")
|
||||
generationHint = chapter.get("generationHint", "")
|
||||
contentPartIds = chapter.get("contentPartIds", [])
|
||||
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
||||
|
||||
chapterPrompt = self._buildChapterSectionsStructurePrompt(
|
||||
chapterId=chapterId,
|
||||
chapterLevel=chapterLevel,
|
||||
chapterTitle=chapterTitle,
|
||||
generationHint=generationHint,
|
||||
contentPartIds=contentPartIds,
|
||||
contentPartInstructions=contentPartInstructions,
|
||||
contentParts=contentParts,
|
||||
userPrompt=userPrompt
|
||||
)
|
||||
|
||||
if needsAggregation and generationHint:
|
||||
# Debug: Log Prompt
|
||||
self.services.utils.writeDebugFile(
|
||||
chapterPrompt,
|
||||
f"chapter_structure_{chapterId}_prompt"
|
||||
)
|
||||
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=chapterPrompt,
|
||||
debugType=f"chapter_structure_{chapterId}"
|
||||
)
|
||||
|
||||
# Debug: Log Response
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse,
|
||||
f"chapter_structure_{chapterId}_response"
|
||||
)
|
||||
|
||||
sectionsStructure = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse)
|
||||
)
|
||||
|
||||
chapter["sections"] = sectionsStructure.get("sections", [])
|
||||
|
||||
# Setze useAiCall Flag (falls nicht von AI gesetzt)
|
||||
for section in chapter["sections"]:
|
||||
if "useAiCall" not in section:
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
useAiCall = contentType != "paragraph"
|
||||
|
||||
# Prüfe contentPartInstructions
|
||||
if not useAiCall:
|
||||
for partId in section.get("contentPartIds", []):
|
||||
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
|
||||
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
|
||||
useAiCall = True
|
||||
break
|
||||
|
||||
section["useAiCall"] = useAiCall
|
||||
|
||||
return chapterStructure
|
||||
|
||||
async def _fillChapterSections(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5D.2: Füllt Sections mit ContentParts.
|
||||
"""
|
||||
# Sammle alle Sections für sequenzielle Verarbeitung
|
||||
sections_to_process = []
|
||||
all_sections_list = [] # Für Kontext-Informationen
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
for chapter in doc.get("chapters", []):
|
||||
for section in chapter.get("sections", []):
|
||||
all_sections_list.append(section)
|
||||
sections_to_process.append((doc, chapter, section))
|
||||
|
||||
# Sequenzielle Section-Generierung
|
||||
fillOperationId = parentOperationId
|
||||
for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process):
|
||||
sectionId = section.get("id")
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
contentFormats = section.get("contentFormats", {})
|
||||
generationHint = section.get("generation_hint")
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
useAiCall = section.get("useAiCall", False)
|
||||
|
||||
elements = []
|
||||
|
||||
# Prüfe ob Aggregation nötig ist
|
||||
needsAggregation = self._needsAggregation(
|
||||
contentType=contentType,
|
||||
contentPartCount=len(contentPartIds)
|
||||
)
|
||||
|
||||
if needsAggregation and useAiCall:
|
||||
# Aggregation: Alle Parts zusammen verarbeiten
|
||||
sectionParts = [
|
||||
self._findContentPartById(pid, contentParts)
|
||||
|
|
@ -202,7 +321,7 @@ class StructureFiller:
|
|||
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||
# NICHT raise - Section wird mit Fehlermeldung gerendert
|
||||
|
||||
else:
|
||||
else:
|
||||
# Einzelverarbeitung: Jeder Part einzeln
|
||||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
|
|
@ -309,18 +428,428 @@ class StructureFiller:
|
|||
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||
})
|
||||
|
||||
section["elements"] = elements
|
||||
|
||||
return chapterStructure
|
||||
|
||||
def _addContentPartsMetadata(
|
||||
self,
|
||||
structure: Dict[str, Any],
|
||||
contentParts: List[ContentPart]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind.
|
||||
Dies hilft der Validierung, den Kontext der ContentParts zu verstehen.
|
||||
"""
|
||||
# Erstelle Mapping von ContentPart-ID zu Metadaten
|
||||
contentPartsMap = {}
|
||||
for part in contentParts:
|
||||
contentPartsMap[part.id] = {
|
||||
"id": part.id,
|
||||
"format": part.metadata.get("contentFormat", "unknown"),
|
||||
"type": part.typeGroup,
|
||||
"mimeType": part.mimeType,
|
||||
"originalFileName": part.metadata.get("originalFileName"),
|
||||
"usageHint": part.metadata.get("usageHint"),
|
||||
"documentId": part.metadata.get("documentId"),
|
||||
"dataSize": len(str(part.data)) if part.data else 0
|
||||
}
|
||||
|
||||
# Füge Metadaten zu Sections hinzu, die contentPartIds haben
|
||||
for doc in structure.get("documents", []):
|
||||
# Prüfe ob Chapters vorhanden sind (neue Struktur)
|
||||
if "chapters" in doc:
|
||||
for chapter in doc.get("chapters", []):
|
||||
# Füge Metadaten zu Chapter-Level contentPartIds hinzu
|
||||
chapterContentPartIds = chapter.get("contentPartIds", [])
|
||||
if chapterContentPartIds:
|
||||
chapter["contentPartsMetadata"] = []
|
||||
for partId in chapterContentPartIds:
|
||||
if partId in contentPartsMap:
|
||||
chapter["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||
|
||||
# Füge Metadaten zu Sections hinzu
|
||||
for section in chapter.get("sections", []):
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
if contentPartIds:
|
||||
section["contentPartsMetadata"] = []
|
||||
for partId in contentPartIds:
|
||||
if partId in contentPartsMap:
|
||||
section["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||
|
||||
# Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur)
|
||||
elif "sections" in doc:
|
||||
for section in doc.get("sections", []):
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
if contentPartIds:
|
||||
section["contentPartsMetadata"] = []
|
||||
for partId in contentPartIds:
|
||||
if partId in contentPartsMap:
|
||||
section["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||
|
||||
return structure
|
||||
|
||||
def _flattenChaptersToSections(
|
||||
self,
|
||||
chapterStructure: Dict[str, Any]
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Flattening: Konvertiert Chapters zu finaler Section-Struktur.
|
||||
Jedes Chapter wird zu einer Heading-Section + dessen Sections.
|
||||
"""
|
||||
result = {
|
||||
"metadata": chapterStructure.get("metadata", {}),
|
||||
"documents": []
|
||||
}
|
||||
|
||||
for doc in chapterStructure.get("documents", []):
|
||||
flattened_doc = {
|
||||
"id": doc.get("id"),
|
||||
"title": doc.get("title"),
|
||||
"filename": doc.get("filename"),
|
||||
"sections": []
|
||||
}
|
||||
|
||||
for chapter in doc.get("chapters", []):
|
||||
# 1. Vordefinierte Heading-Section für Chapter-Title
|
||||
heading_section = {
|
||||
"id": f"{chapter['id']}_heading",
|
||||
"content_type": "heading",
|
||||
"elements": [{
|
||||
"type": "heading",
|
||||
"content": chapter.get("title"),
|
||||
"level": chapter.get("level", 1)
|
||||
}]
|
||||
}
|
||||
flattened_doc["sections"].append(heading_section)
|
||||
|
||||
# 2. Generierte Sections
|
||||
flattened_doc["sections"].extend(chapter.get("sections", []))
|
||||
|
||||
result["documents"].append(flattened_doc)
|
||||
|
||||
return result
|
||||
|
||||
async def _fillStructureLegacy(
|
||||
self,
|
||||
structure: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str,
|
||||
fillOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität).
|
||||
"""
|
||||
# Starte ChatLog
|
||||
self.services.chat.progressLogStart(
|
||||
fillOperationId,
|
||||
"Structure Filling (Legacy)",
|
||||
"Filling",
|
||||
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
|
||||
parentOperationId=fillOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
filledStructure = copy.deepcopy(structure)
|
||||
|
||||
# Sammle alle Sections
|
||||
sections_to_process = []
|
||||
all_sections_list = []
|
||||
for doc in filledStructure.get("documents", []):
|
||||
doc_sections = doc.get("sections", [])
|
||||
all_sections_list.extend(doc_sections)
|
||||
for section in doc_sections:
|
||||
sections_to_process.append((doc, section))
|
||||
|
||||
# Verarbeite Sections (bestehende Logik)
|
||||
for sectionIndex, (doc, section) in enumerate(sections_to_process):
|
||||
sectionId = section.get("id")
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
contentFormats = section.get("contentFormats", {})
|
||||
generationHint = section.get("generation_hint")
|
||||
contentType = section.get("content_type", "paragraph")
|
||||
|
||||
elements = []
|
||||
|
||||
# Prüfe ob Aggregation nötig ist
|
||||
needsAggregation = self._needsAggregation(
|
||||
contentType=contentType,
|
||||
contentPartCount=len(contentPartIds)
|
||||
)
|
||||
|
||||
if needsAggregation and generationHint:
|
||||
# Aggregation: Alle Parts zusammen verarbeiten
|
||||
sectionParts = [
|
||||
self._findContentPartById(pid, contentParts)
|
||||
for pid in contentPartIds
|
||||
]
|
||||
sectionParts = [p for p in sectionParts if p is not None]
|
||||
|
||||
if sectionParts:
|
||||
# Filtere nur extracted Parts für Aggregation
|
||||
extractedParts = [
|
||||
p for p in sectionParts
|
||||
if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
|
||||
]
|
||||
nonExtractedParts = [
|
||||
p for p in sectionParts
|
||||
if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
|
||||
]
|
||||
|
||||
# Verarbeite non-extracted Parts separat
|
||||
for part in nonExtractedParts:
|
||||
contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
|
||||
|
||||
if contentFormat == "reference":
|
||||
elements.append({
|
||||
"type": "reference",
|
||||
"documentReference": part.metadata.get("documentReference"),
|
||||
"label": part.metadata.get("usageHint", part.label)
|
||||
})
|
||||
elif contentFormat == "object":
|
||||
elements.append({
|
||||
"type": part.typeGroup,
|
||||
"base64Data": part.data,
|
||||
"mimeType": part.mimeType,
|
||||
"altText": part.metadata.get("usageHint", part.label)
|
||||
})
|
||||
|
||||
# Aggregiere extracted Parts mit AI
|
||||
if extractedParts:
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=extractedParts,
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=all_sections_list,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=True
|
||||
)
|
||||
|
||||
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
||||
|
||||
self.services.chat.progressLogStart(
|
||||
sectionOperationId,
|
||||
"Section Generation (Aggregation)",
|
||||
"Section",
|
||||
f"Generating section {sectionId} with {len(extractedParts)} parts",
|
||||
parentOperationId=fillOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
generationPrompt,
|
||||
f"section_content_{sectionId}_prompt"
|
||||
)
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=generationPrompt,
|
||||
contentParts=extractedParts,
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
)
|
||||
aiResponse = await self.aiService.callAi(request)
|
||||
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse.content,
|
||||
f"section_content_{sectionId}_response"
|
||||
)
|
||||
|
||||
generatedElements = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse.content)
|
||||
)
|
||||
if isinstance(generatedElements, list):
|
||||
elements.extend(generatedElements)
|
||||
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
|
||||
elements.extend(generatedElements["elements"])
|
||||
|
||||
self.services.chat.progressLogFinish(sectionOperationId, True)
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(sectionOperationId, False)
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Error generating section {sectionId}: {str(e)}",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||
|
||||
else:
|
||||
# Einzelverarbeitung: Jeder Part einzeln
|
||||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if not part:
|
||||
continue
|
||||
|
||||
contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
|
||||
|
||||
if contentFormat == "reference":
|
||||
elements.append({
|
||||
"type": "reference",
|
||||
"documentReference": part.metadata.get("documentReference"),
|
||||
"label": part.metadata.get("usageHint", part.label)
|
||||
})
|
||||
|
||||
elif contentFormat == "object":
|
||||
elements.append({
|
||||
"type": part.typeGroup,
|
||||
"base64Data": part.data,
|
||||
"mimeType": part.mimeType,
|
||||
"altText": part.metadata.get("usageHint", part.label)
|
||||
})
|
||||
|
||||
elif contentFormat == "extracted":
|
||||
if generationHint:
|
||||
# AI-Call mit einzelnen ContentPart
|
||||
generationPrompt = self._buildSectionGenerationPrompt(
|
||||
section=section,
|
||||
contentParts=[part],
|
||||
userPrompt=userPrompt,
|
||||
generationHint=generationHint,
|
||||
allSections=all_sections_list,
|
||||
sectionIndex=sectionIndex,
|
||||
isAggregation=False
|
||||
)
|
||||
|
||||
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
||||
|
||||
self.services.chat.progressLogStart(
|
||||
sectionOperationId,
|
||||
"Section Generation",
|
||||
"Section",
|
||||
f"Generating section {sectionId}",
|
||||
parentOperationId=fillOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
self.services.utils.writeDebugFile(
|
||||
generationPrompt,
|
||||
f"section_content_{sectionId}_prompt"
|
||||
)
|
||||
|
||||
request = AiCallRequest(
|
||||
prompt=generationPrompt,
|
||||
contentParts=[part],
|
||||
options=AiCallOptions(
|
||||
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||
priority=PriorityEnum.BALANCED,
|
||||
processingMode=ProcessingModeEnum.DETAILED
|
||||
)
|
||||
)
|
||||
aiResponse = await self.aiService.callAi(request)
|
||||
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse.content,
|
||||
f"section_content_{sectionId}_response"
|
||||
)
|
||||
|
||||
generatedElements = json.loads(
|
||||
self.services.utils.jsonExtractString(aiResponse.content)
|
||||
)
|
||||
if isinstance(generatedElements, list):
|
||||
elements.extend(generatedElements)
|
||||
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
|
||||
elements.extend(generatedElements["elements"])
|
||||
|
||||
self.services.chat.progressLogFinish(sectionOperationId, True)
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(sectionOperationId, False)
|
||||
elements.append({
|
||||
"type": "error",
|
||||
"message": f"Error generating section {sectionId}: {str(e)}",
|
||||
"sectionId": sectionId
|
||||
})
|
||||
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||
else:
|
||||
elements.append({
|
||||
"type": "extracted_text",
|
||||
"content": part.data,
|
||||
"source": part.metadata.get("documentId"),
|
||||
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||
})
|
||||
|
||||
section["elements"] = elements
|
||||
|
||||
# ChatLog abschließen
|
||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
||||
filledStructure = self._addContentPartsMetadata(filledStructure, contentParts)
|
||||
|
||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||
return filledStructure
|
||||
|
||||
except Exception as e:
|
||||
self.services.chat.progressLogFinish(fillOperationId, False)
|
||||
logger.error(f"Error in fillStructure: {str(e)}")
|
||||
logger.error(f"Error in _fillStructureLegacy: {str(e)}")
|
||||
raise
|
||||
|
||||
def _buildChapterSectionsStructurePrompt(
|
||||
self,
|
||||
chapterId: str,
|
||||
chapterLevel: int,
|
||||
chapterTitle: str,
|
||||
generationHint: str,
|
||||
contentPartIds: List[str],
|
||||
contentPartInstructions: Dict[str, Any],
|
||||
contentParts: List[ContentPart],
|
||||
userPrompt: str
|
||||
) -> str:
|
||||
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
|
||||
# Baue ContentParts-Index (nur IDs, keine Previews!)
|
||||
contentPartsIndex = ""
|
||||
for partId in contentPartIds:
|
||||
part = self._findContentPartById(partId, contentParts)
|
||||
if not part:
|
||||
continue
|
||||
|
||||
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
|
||||
|
||||
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
|
||||
contentPartsIndex += f" Format: {contentFormat}\n"
|
||||
contentPartsIndex += f" Type: {part.typeGroup}\n"
|
||||
contentPartsIndex += f" Instruction: {instruction}\n"
|
||||
|
||||
if not contentPartsIndex:
|
||||
contentPartsIndex = "\n(No content parts specified for this chapter)"
|
||||
|
||||
prompt = f"""TASK: Generate Chapter Sections Structure
|
||||
|
||||
CHAPTER METADATA:
|
||||
- Chapter ID: {chapterId}
|
||||
- Chapter Level: {chapterLevel}
|
||||
- Chapter Title: {chapterTitle}
|
||||
- Generation Hint: {generationHint}
|
||||
|
||||
WICHTIG: Chapter hat bereits vordefinierte Heading-Section.
|
||||
Generiere NICHT eine Heading-Section für Chapter-Title!
|
||||
|
||||
AVAILABLE CONTENT PARTS:
|
||||
{contentPartsIndex}
|
||||
|
||||
STANDARD JSON SCHEMA FOR SECTIONS:
|
||||
Supported content_types: table, bullet_list, heading, paragraph, code_block, image
|
||||
|
||||
Return JSON:
|
||||
{{
|
||||
"sections": [
|
||||
{{
|
||||
"id": "section_1",
|
||||
"content_type": "paragraph",
|
||||
"contentPartIds": ["part_ext_1"],
|
||||
"generationHint": "...",
|
||||
"useAiCall": false,
|
||||
"elements": []
|
||||
}}
|
||||
]
|
||||
}}
|
||||
|
||||
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
|
||||
"""
|
||||
return prompt
|
||||
|
||||
def _buildSectionGenerationPrompt(
|
||||
self,
|
||||
section: Dict[str, Any],
|
||||
|
|
|
|||
|
|
@ -32,11 +32,12 @@ class StructureGenerator:
|
|||
parentOperationId: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Phase 5C: Generiert Dokument-Struktur mit Sections.
|
||||
Jede Section spezifiziert:
|
||||
- Welcher Content sollte in dieser Section sein
|
||||
- Welche ContentParts zu verwenden sind
|
||||
- Format für jeden ContentPart
|
||||
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
|
||||
Definiert für jedes Chapter:
|
||||
- Level, Title
|
||||
- contentPartIds
|
||||
- contentPartInstructions
|
||||
- generationHint
|
||||
|
||||
Args:
|
||||
userPrompt: User-Anfrage
|
||||
|
|
@ -45,7 +46,7 @@ class StructureGenerator:
|
|||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||
|
||||
Returns:
|
||||
Struktur-Dict mit documents und sections
|
||||
Struktur-Dict mit documents und chapters (nicht sections!)
|
||||
"""
|
||||
# Erstelle Operation-ID für Struktur-Generierung
|
||||
structureOperationId = f"{parentOperationId}_structure_generation"
|
||||
|
|
@ -53,25 +54,36 @@ class StructureGenerator:
|
|||
# Starte ChatLog mit Parent-Referenz
|
||||
self.services.chat.progressLogStart(
|
||||
structureOperationId,
|
||||
"Structure Generation",
|
||||
"Chapter Structure Generation",
|
||||
"Structure",
|
||||
f"Generating structure for {outputFormat}",
|
||||
f"Generating chapter structure for {outputFormat}",
|
||||
parentOperationId=parentOperationId
|
||||
)
|
||||
|
||||
try:
|
||||
# Baue Struktur-Prompt mit Content-Index
|
||||
structurePrompt = self._buildStructurePrompt(
|
||||
# Baue Chapter-Struktur-Prompt mit Content-Index
|
||||
structurePrompt = self._buildChapterStructurePrompt(
|
||||
userPrompt=userPrompt,
|
||||
contentParts=contentParts,
|
||||
outputFormat=outputFormat
|
||||
)
|
||||
|
||||
# AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses)
|
||||
# Debug-Logs werden bereits von callAiPlanning geschrieben
|
||||
# Debug: Log Prompt
|
||||
self.services.utils.writeDebugFile(
|
||||
structurePrompt,
|
||||
"chapter_structure_generation_prompt"
|
||||
)
|
||||
|
||||
# AI-Call für Chapter-Struktur-Generierung
|
||||
aiResponse = await self.aiService.callAiPlanning(
|
||||
prompt=structurePrompt,
|
||||
debugType="document_generation_structure"
|
||||
debugType="chapter_structure_generation"
|
||||
)
|
||||
|
||||
# Debug: Log Response
|
||||
self.services.utils.writeDebugFile(
|
||||
aiResponse,
|
||||
"chapter_structure_generation_response"
|
||||
)
|
||||
|
||||
# Parse Struktur
|
||||
|
|
@ -87,13 +99,13 @@ class StructureGenerator:
|
|||
logger.error(f"Error in generateStructure: {str(e)}")
|
||||
raise
|
||||
|
||||
def _buildStructurePrompt(
|
||||
def _buildChapterStructurePrompt(
|
||||
self,
|
||||
userPrompt: str,
|
||||
contentParts: List[ContentPart],
|
||||
outputFormat: str
|
||||
) -> str:
|
||||
"""Baue Prompt für Struktur-Generierung."""
|
||||
"""Baue Prompt für Chapter-Struktur-Generierung."""
|
||||
# Baue ContentParts-Index - filtere leere Parts heraus
|
||||
contentPartsIndex = ""
|
||||
validParts = []
|
||||
|
|
@ -179,14 +191,19 @@ class StructureGenerator:
|
|||
AVAILABLE CONTENT PARTS:
|
||||
{contentPartsIndex}
|
||||
|
||||
TASK: Generiere Dokument-Struktur mit Sections.
|
||||
Für jede Section, spezifiziere:
|
||||
- section id
|
||||
- content_type (heading, paragraph, image, table, etc.)
|
||||
- contentPartIds: [Liste von ContentPart-IDs zu verwenden]
|
||||
- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist
|
||||
- generation_hint: Was AI für diese Section generieren soll
|
||||
- elements: [] (leer, wird in nächster Phase gefüllt)
|
||||
TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente.
|
||||
|
||||
Für jedes Chapter:
|
||||
- chapter id
|
||||
- level (1, 2, 3, etc.)
|
||||
- title
|
||||
- contentPartIds: [Liste von ContentPart-IDs]
|
||||
- contentPartInstructions: {{
|
||||
"partId": {{
|
||||
"instruction": "Wie Content strukturiert werden soll"
|
||||
}}
|
||||
}}
|
||||
- generationHint: Beschreibung des Inhalts
|
||||
|
||||
OUTPUT FORMAT: {outputFormat}
|
||||
|
||||
|
|
@ -200,24 +217,19 @@ RETURN JSON:
|
|||
"id": "doc_1",
|
||||
"title": "Document Title",
|
||||
"filename": "document.{outputFormat}",
|
||||
"sections": [
|
||||
"chapters": [
|
||||
{{
|
||||
"id": "section_1",
|
||||
"content_type": "heading",
|
||||
"generation_hint": "Main title",
|
||||
"contentPartIds": [],
|
||||
"contentFormats": {{}},
|
||||
"elements": []
|
||||
}},
|
||||
{{
|
||||
"id": "section_2",
|
||||
"content_type": "paragraph",
|
||||
"generation_hint": "Introduction paragraph",
|
||||
"id": "chapter_1",
|
||||
"level": 1,
|
||||
"title": "Introduction",
|
||||
"contentPartIds": ["part_ext_1"],
|
||||
"contentFormats": {{
|
||||
"part_ext_1": "extracted"
|
||||
"contentPartInstructions": {{
|
||||
"part_ext_1": {{
|
||||
"instruction": "Use full extracted text"
|
||||
}}
|
||||
}},
|
||||
"elements": []
|
||||
"generationHint": "Create introduction section",
|
||||
"sections": []
|
||||
}}
|
||||
]
|
||||
}}]
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import uuid
|
|||
import base64
|
||||
import traceback
|
||||
from typing import Any, Dict, List, Optional, Callable
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from modules.datamodels.datamodelChat import ChatDocument
|
||||
from modules.services.serviceGeneration.subDocumentUtility import (
|
||||
getFileExtension,
|
||||
|
|
@ -345,31 +346,31 @@ class GenerationService:
|
|||
'workflowId': 'unknown'
|
||||
}
|
||||
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]:
|
||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to the specified output format.
|
||||
Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering).
|
||||
Always uses unified "documents" array format.
|
||||
Supports three content formats: reference, object (base64), extracted_text.
|
||||
Processes EACH document separately and calls renderer for each.
|
||||
Each renderer can return 1..n documents (e.g., HTML + images).
|
||||
|
||||
Args:
|
||||
extractedContent: Structured JSON document from AI extraction
|
||||
extractedContent: Structured JSON document with documents array
|
||||
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||
In future, each document can have its own format
|
||||
title: Report title
|
||||
userPrompt: User's original prompt for report generation
|
||||
aiService: AI service instance for generation prompt creation
|
||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||
|
||||
Returns:
|
||||
tuple: (rendered_content, mime_type, images_list)
|
||||
images_list: List of image dicts with base64Data, altText, caption, etc.
|
||||
List of RenderedDocument objects.
|
||||
Each RenderedDocument represents one rendered file (main document or supporting file)
|
||||
"""
|
||||
try:
|
||||
# Validate JSON input
|
||||
if not isinstance(extractedContent, dict):
|
||||
raise ValueError("extractedContent must be a JSON dictionary")
|
||||
|
||||
# Unified approach: Always expect "documents" array (single doc = n=1)
|
||||
# Unified approach: Always expect "documents" array
|
||||
if "documents" not in extractedContent:
|
||||
raise ValueError("extractedContent must contain 'documents' array")
|
||||
|
||||
|
|
@ -377,56 +378,45 @@ class GenerationService:
|
|||
if len(documents) == 0:
|
||||
raise ValueError("No documents found in 'documents' array")
|
||||
|
||||
# Phase 5: Multi-Dokument-Rendering
|
||||
if len(documents) == 1:
|
||||
# Single document - use existing logic
|
||||
single_doc = documents[0]
|
||||
if "sections" not in single_doc:
|
||||
raise ValueError("Document must contain 'sections' field")
|
||||
metadata = extractedContent.get("metadata", {})
|
||||
allRenderedDocuments = []
|
||||
|
||||
# Pass standardized schema to renderer (maintains architecture)
|
||||
contentToRender = extractedContent # Pass full standardized schema
|
||||
else:
|
||||
# Multiple documents - merge all sections into one document for rendering
|
||||
# Option: Merge all sections from all documents into a single document
|
||||
all_sections = []
|
||||
for doc in documents:
|
||||
if isinstance(doc, dict) and "sections" in doc:
|
||||
sections = doc.get("sections", [])
|
||||
if isinstance(sections, list):
|
||||
all_sections.extend(sections)
|
||||
# Process EACH document separately
|
||||
for docIndex, doc in enumerate(documents):
|
||||
if not isinstance(doc, dict):
|
||||
logger.warning(f"Skipping invalid document at index {docIndex}")
|
||||
continue
|
||||
|
||||
if not all_sections:
|
||||
raise ValueError("No sections found in any document")
|
||||
if "sections" not in doc:
|
||||
logger.warning(f"Document {doc.get('id', docIndex)} has no sections, skipping")
|
||||
continue
|
||||
|
||||
# Create merged document with all sections
|
||||
merged_document = {
|
||||
"metadata": extractedContent.get("metadata", {}),
|
||||
"documents": [{
|
||||
"id": "merged",
|
||||
"title": title,
|
||||
"filename": f"{title}.{outputFormat}",
|
||||
"sections": all_sections
|
||||
}]
|
||||
# Determine format for this document
|
||||
# TODO: In future, each document can have its own format field
|
||||
# For now, use the global outputFormat
|
||||
docFormat = doc.get("format", outputFormat)
|
||||
|
||||
# Get renderer for this document's format
|
||||
renderer = self._getFormatRenderer(docFormat)
|
||||
if not renderer:
|
||||
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
|
||||
continue
|
||||
|
||||
# Create JSON structure with single document (preserving metadata)
|
||||
singleDocContent = {
|
||||
"metadata": metadata,
|
||||
"documents": [doc] # Only this document
|
||||
}
|
||||
contentToRender = merged_document
|
||||
logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections")
|
||||
|
||||
# Get the appropriate renderer for the format
|
||||
renderer = self._getFormatRenderer(outputFormat)
|
||||
if not renderer:
|
||||
raise ValueError(f"Unsupported output format: {outputFormat}")
|
||||
# Use document title or fallback to provided title
|
||||
docTitle = doc.get("title", title)
|
||||
|
||||
# Render the JSON content directly (AI generation handled by main service)
|
||||
# Renderer receives standardized schema and extracts what it needs
|
||||
renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
|
||||
# Render this document (can return multiple files, e.g., HTML + images)
|
||||
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
|
||||
allRenderedDocuments.extend(renderedDocs)
|
||||
|
||||
# Get images from renderer if available
|
||||
images = []
|
||||
if hasattr(renderer, 'getRenderedImages'):
|
||||
images = renderer.getRenderedImages()
|
||||
|
||||
return renderedContent, mimeType, images
|
||||
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
|
||||
return allRenderedDocuments
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -5,8 +5,9 @@ Base renderer class for all format renderers.
|
|||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from typing import Dict, Any, List
|
||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
|
|
@ -50,21 +51,49 @@ class BaseRenderer(ABC):
|
|||
return 0
|
||||
|
||||
@abstractmethod
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render extracted JSON content to the target format.
|
||||
Render extracted JSON content to multiple documents.
|
||||
Each renderer must implement this method.
|
||||
Can return 1..n documents (e.g., HTML + images).
|
||||
|
||||
Args:
|
||||
extractedContent: Structured JSON content with sections and metadata
|
||||
extractedContent: Structured JSON content with sections and metadata (contains single document)
|
||||
title: Report title
|
||||
userPrompt: Original user prompt for context
|
||||
aiService: AI service instance for additional processing
|
||||
|
||||
Returns:
|
||||
tuple: (renderedContent, mimeType)
|
||||
List of RenderedDocument objects.
|
||||
First document is the main document, additional documents are supporting files (e.g., images).
|
||||
Even if only one document is returned, it must be wrapped in a list.
|
||||
"""
|
||||
pass
|
||||
|
||||
def _determineFilename(self, title: str, mimeType: str) -> str:
|
||||
"""Determine filename from title and mimeType."""
|
||||
import re
|
||||
# Get extension from mimeType
|
||||
extensionMap = {
|
||||
"text/html": "html",
|
||||
"application/pdf": "pdf",
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
||||
"text/plain": "txt",
|
||||
"text/markdown": "md",
|
||||
"application/json": "json",
|
||||
"text/csv": "csv"
|
||||
}
|
||||
extension = extensionMap.get(mimeType, "txt")
|
||||
|
||||
# Sanitize title for filename
|
||||
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title)
|
||||
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||
if not sanitized:
|
||||
sanitized = "document"
|
||||
|
||||
return f"{sanitized}.{extension}"
|
||||
|
||||
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ CSV renderer for report generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
|
||||
class RendererCsv(BaseRenderer):
|
||||
"""Renders content to CSV format with format-specific extraction."""
|
||||
|
|
@ -25,13 +26,28 @@ class RendererCsv(BaseRenderer):
|
|||
"""Return priority for CSV renderer."""
|
||||
return 70
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to CSV format."""
|
||||
try:
|
||||
# Generate CSV directly from JSON (no styling needed for CSV)
|
||||
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
||||
|
||||
return csvContent, "text/csv"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "text/csv")
|
||||
else:
|
||||
filename = self._determineFilename(title, "text/csv")
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=csvContent.encode('utf-8'),
|
||||
mimeType="text/csv",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ DOCX renderer for report generation using python-docx.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
import io
|
||||
import base64
|
||||
import re
|
||||
|
|
@ -38,7 +39,7 @@ class RendererDocx(BaseRenderer):
|
|||
"""Return priority for DOCX renderer."""
|
||||
return 115
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||
try:
|
||||
|
|
@ -46,18 +47,48 @@ class RendererDocx(BaseRenderer):
|
|||
# Fallback to HTML if python-docx not available
|
||||
from .rendererHtml import RendererHtml
|
||||
htmlRenderer = RendererHtml()
|
||||
htmlContent, _ = await htmlRenderer.render(extractedContent, title)
|
||||
return htmlContent, "text/html"
|
||||
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
# Generate DOCX using AI-analyzed styling
|
||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
else:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||
|
||||
# Convert DOCX content to bytes if it's a string (base64)
|
||||
if isinstance(docx_content, str):
|
||||
try:
|
||||
docx_bytes = base64.b64decode(docx_content)
|
||||
except Exception:
|
||||
docx_bytes = docx_content.encode('utf-8')
|
||||
else:
|
||||
docx_bytes = docx_content
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=docx_bytes,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering DOCX: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
||||
fallbackContent = f"DOCX Generation Error: {str(e)}"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=self._determineFilename(title, "text/plain")
|
||||
)
|
||||
]
|
||||
|
||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate DOCX content from structured JSON document."""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ HTML renderer for report generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
|
||||
class RendererHtml(BaseRenderer):
|
||||
"""Renders content to HTML format with format-specific extraction."""
|
||||
|
|
@ -25,29 +26,66 @@ class RendererHtml(BaseRenderer):
|
|||
"""Return priority for HTML renderer."""
|
||||
return 100
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
||||
try:
|
||||
# Extract images first
|
||||
images = self._extractImages(extractedContent)
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render HTML document with images as separate files.
|
||||
Returns list of documents: [HTML document, image1, image2, ...]
|
||||
"""
|
||||
import base64
|
||||
|
||||
# Store images in instance for later retrieval
|
||||
self._renderedImages = images
|
||||
# Extract images first
|
||||
images = self._extractImages(extractedContent)
|
||||
|
||||
# Generate HTML using AI-analyzed styling
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||
# Store images in instance for later retrieval
|
||||
self._renderedImages = images
|
||||
|
||||
# Replace base64 data URIs with relative file paths if images exist
|
||||
if images:
|
||||
htmlContent = self._replaceImageDataUris(htmlContent, images)
|
||||
# Generate HTML using AI-analyzed styling
|
||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return htmlContent, "text/html"
|
||||
# Replace base64 data URIs with relative file paths if images exist
|
||||
if images:
|
||||
htmlContent = self._replaceImageDataUris(htmlContent, images)
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
||||
# Return minimal HTML fallback
|
||||
self._renderedImages = [] # Initialize empty list on error
|
||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
||||
# Determine HTML filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
htmlFilename = documents[0].get("filename")
|
||||
if not htmlFilename:
|
||||
htmlFilename = self._determineFilename(title, "text/html")
|
||||
else:
|
||||
htmlFilename = self._determineFilename(title, "text/html")
|
||||
|
||||
# Start with HTML document
|
||||
resultDocuments = [
|
||||
RenderedDocument(
|
||||
documentData=htmlContent.encode('utf-8'),
|
||||
mimeType="text/html",
|
||||
filename=htmlFilename
|
||||
)
|
||||
]
|
||||
|
||||
# Add images as separate documents
|
||||
for img in images:
|
||||
base64Data = img.get("base64Data", "")
|
||||
filename = img.get("filename", f"image_{len(resultDocuments)}.png")
|
||||
mimeType = img.get("mimeType", "image/png")
|
||||
|
||||
if base64Data:
|
||||
try:
|
||||
# Decode base64 to bytes
|
||||
imageBytes = base64.b64decode(base64Data)
|
||||
resultDocuments.append(
|
||||
RenderedDocument(
|
||||
documentData=imageBytes,
|
||||
mimeType=mimeType,
|
||||
filename=filename
|
||||
)
|
||||
)
|
||||
self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)")
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Error creating image file {filename}: {str(e)}")
|
||||
|
||||
return resultDocuments
|
||||
|
||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||
|
|
@ -597,8 +635,31 @@ class RendererHtml(BaseRenderer):
|
|||
|
||||
if base64Data:
|
||||
sectionId = section.get("id", "unknown")
|
||||
|
||||
# Bestimme MIME-Type und Extension
|
||||
mimeType = element.get("mimeType", "image/png")
|
||||
if not mimeType or mimeType == "unknown":
|
||||
# Versuche MIME-Type aus base64 zu erkennen
|
||||
if base64Data.startswith("/9j/"):
|
||||
mimeType = "image/jpeg"
|
||||
elif base64Data.startswith("iVBORw0KGgo"):
|
||||
mimeType = "image/png"
|
||||
else:
|
||||
mimeType = "image/png" # Default
|
||||
|
||||
# Bestimme Extension basierend auf MIME-Type
|
||||
extension = "png"
|
||||
if mimeType == "image/jpeg" or mimeType == "image/jpg":
|
||||
extension = "jpg"
|
||||
elif mimeType == "image/png":
|
||||
extension = "png"
|
||||
elif mimeType == "image/gif":
|
||||
extension = "gif"
|
||||
elif mimeType == "image/webp":
|
||||
extension = "webp"
|
||||
|
||||
# Generate filename from section ID
|
||||
filename = f"{sectionId}.png"
|
||||
filename = f"{sectionId}.{extension}"
|
||||
# Clean filename (remove invalid characters)
|
||||
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
|
||||
|
||||
|
|
@ -607,7 +668,8 @@ class RendererHtml(BaseRenderer):
|
|||
"altText": element.get("altText", "Image"),
|
||||
"caption": element.get("caption"),
|
||||
"sectionId": sectionId,
|
||||
"filename": filename
|
||||
"filename": filename,
|
||||
"mimeType": mimeType
|
||||
})
|
||||
self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
|
||||
|
||||
|
|
@ -633,8 +695,9 @@ class RendererHtml(BaseRenderer):
|
|||
import base64
|
||||
import re
|
||||
|
||||
# Find all image data URIs in HTML
|
||||
dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)'
|
||||
# Find all image data URIs in HTML (verschiedene MIME-Types unterstützen)
|
||||
# Pattern: data:image/[type];base64,<base64>
|
||||
dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)'
|
||||
|
||||
def replaceDataUri(match):
|
||||
base64Data = match.group(1)
|
||||
|
|
@ -642,7 +705,9 @@ class RendererHtml(BaseRenderer):
|
|||
# Find matching image in images list
|
||||
matchingImage = None
|
||||
for img in images:
|
||||
if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]):
|
||||
imgBase64 = img.get("base64Data", "")
|
||||
# Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding)
|
||||
if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]):
|
||||
matchingImage = img
|
||||
break
|
||||
|
||||
|
|
@ -650,20 +715,25 @@ class RendererHtml(BaseRenderer):
|
|||
# Use filename from image data (generated from section ID)
|
||||
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
|
||||
|
||||
# Replace with relative path
|
||||
# Replace with relative path (ohne Pfad, nur Dateiname)
|
||||
altText = matchingImage.get("altText", "Image")
|
||||
caption = matchingImage.get("caption", "")
|
||||
|
||||
# Entferne IMAGE_MARKER Kommentar falls vorhanden
|
||||
imgTag = f'<img src="{filename}" alt="{altText}">'
|
||||
|
||||
if caption:
|
||||
return f'<figure><img src="{filename}" alt="{altText}"><figcaption>{caption}</figcaption></figure>'
|
||||
return f'<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
|
||||
else:
|
||||
return f'<img src="{filename}" alt="{altText}">'
|
||||
return imgTag
|
||||
else:
|
||||
# Keep original if no match found
|
||||
return match.group(0)
|
||||
|
||||
# Replace all data URIs
|
||||
# Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen)
|
||||
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
|
||||
# Entferne IMAGE_MARKER Kommentare die übrig geblieben sind
|
||||
updatedHtml = re.sub(r'<!--IMAGE_MARKER:[^>]+-->', '', updatedHtml)
|
||||
|
||||
return updatedHtml
|
||||
|
||||
|
|
|
|||
|
|
@ -5,8 +5,10 @@ Image renderer for report generation using AI image generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
import logging
|
||||
import base64
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -28,13 +30,37 @@ class RendererImage(BaseRenderer):
|
|||
"""Return priority for image renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to image format using AI image generation."""
|
||||
try:
|
||||
# Generate AI image from content
|
||||
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return imageContent, "image/png"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "image/png")
|
||||
else:
|
||||
filename = self._determineFilename(title, "image/png")
|
||||
|
||||
# Convert image content to bytes (base64 string or bytes)
|
||||
if isinstance(imageContent, str):
|
||||
try:
|
||||
imageBytes = base64.b64decode(imageContent)
|
||||
except Exception:
|
||||
imageBytes = imageContent.encode('utf-8')
|
||||
else:
|
||||
imageBytes = imageContent
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=imageBytes,
|
||||
mimeType="image/png",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering image: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ JSON renderer for report generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
import json
|
||||
|
||||
class RendererJson(BaseRenderer):
|
||||
|
|
@ -26,14 +27,29 @@ class RendererJson(BaseRenderer):
|
|||
"""Return priority for JSON renderer."""
|
||||
return 80
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to JSON format."""
|
||||
try:
|
||||
# The extracted content should already be JSON from the AI
|
||||
# Just validate and format it
|
||||
jsonContent = self._cleanJsonContent(extractedContent, title)
|
||||
|
||||
return jsonContent, "application/json"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "application/json")
|
||||
else:
|
||||
filename = self._determineFilename(title, "application/json")
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=jsonContent.encode('utf-8'),
|
||||
mimeType="application/json",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||
|
|
@ -43,7 +59,14 @@ class RendererJson(BaseRenderer):
|
|||
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
||||
"metadata": {"error": str(e)}
|
||||
}
|
||||
return json.dumps(fallbackData, indent=2), "application/json"
|
||||
fallbackContent = json.dumps(fallbackData, indent=2)
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="application/json",
|
||||
filename=self._determineFilename(title, "application/json")
|
||||
)
|
||||
]
|
||||
|
||||
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
||||
"""Clean and validate JSON content from AI."""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ Markdown renderer for report generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
|
||||
class RendererMarkdown(BaseRenderer):
|
||||
"""Renders content to Markdown format with format-specific extraction."""
|
||||
|
|
@ -25,18 +26,40 @@ class RendererMarkdown(BaseRenderer):
|
|||
"""Return priority for markdown renderer."""
|
||||
return 95
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Markdown format."""
|
||||
try:
|
||||
# Generate markdown from JSON structure
|
||||
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
||||
|
||||
return markdownContent, "text/markdown"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "text/markdown")
|
||||
else:
|
||||
filename = self._determineFilename(title, "text/markdown")
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=markdownContent.encode('utf-8'),
|
||||
mimeType="text/markdown",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||
# Return minimal markdown fallback
|
||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
||||
fallbackContent = f"# {title}\n\nError rendering report: {str(e)}"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/markdown",
|
||||
filename=self._determineFilename(title, "text/markdown")
|
||||
)
|
||||
]
|
||||
|
||||
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate markdown content from structured JSON document."""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ PDF renderer for report generation using reportlab.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
import io
|
||||
import base64
|
||||
|
||||
|
|
@ -38,25 +39,56 @@ class RendererPdf(BaseRenderer):
|
|||
"""Return priority for PDF renderer."""
|
||||
return 120
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||
try:
|
||||
if not REPORTLAB_AVAILABLE:
|
||||
# Fallback to HTML if reportlab not available
|
||||
from .rendererHtml import RendererHtml
|
||||
html_renderer = RendererHtml()
|
||||
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return html_content, "text/html"
|
||||
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
# Generate PDF using AI-analyzed styling
|
||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return pdf_content, "application/pdf"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "application/pdf")
|
||||
else:
|
||||
filename = self._determineFilename(title, "application/pdf")
|
||||
|
||||
# Convert PDF content to bytes if it's a string (base64)
|
||||
if isinstance(pdf_content, str):
|
||||
# Try to decode as base64, otherwise encode as UTF-8
|
||||
try:
|
||||
pdf_bytes = base64.b64decode(pdf_content)
|
||||
except Exception:
|
||||
pdf_bytes = pdf_content.encode('utf-8')
|
||||
else:
|
||||
pdf_bytes = pdf_content
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=pdf_bytes,
|
||||
mimeType="application/pdf",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||
# Return minimal fallback
|
||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
||||
fallbackContent = f"PDF Generation Error: {str(e)}"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=self._determineFilename(title, "text/plain")
|
||||
)
|
||||
]
|
||||
|
||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||
|
|
|
|||
|
|
@ -6,8 +6,9 @@ import io
|
|||
import json
|
||||
import re
|
||||
from datetime import datetime, UTC
|
||||
from typing import Dict, Any, Optional, Tuple, List
|
||||
from typing import Dict, Any, Optional, List
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -25,7 +26,7 @@ class RendererPptx(BaseRenderer):
|
|||
"""Get list of supported output formats."""
|
||||
return ["pptx", "ppt"]
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""
|
||||
Render content as PowerPoint presentation from JSON data.
|
||||
|
||||
|
|
@ -204,14 +205,44 @@ class RendererPptx(BaseRenderer):
|
|||
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
|
||||
|
||||
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
|
||||
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
||||
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
||||
else:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=pptx_bytes,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except ImportError:
|
||||
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
|
||||
return "python-pptx library not installed", "text/plain"
|
||||
fallbackContent = "python-pptx library not installed"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=self._determineFilename(title, "text/plain")
|
||||
)
|
||||
]
|
||||
except Exception as e:
|
||||
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
||||
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
||||
fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=self._determineFilename(title, "text/plain")
|
||||
)
|
||||
]
|
||||
|
||||
def _parseContentToSlides(self, content: str, title: str) -> list:
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ Text renderer for report generation.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
|
||||
class RendererText(BaseRenderer):
|
||||
"""Renders content to plain text format with format-specific extraction."""
|
||||
|
|
@ -47,18 +48,40 @@ class RendererText(BaseRenderer):
|
|||
"""Return priority for text renderer."""
|
||||
return 90
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to plain text format."""
|
||||
try:
|
||||
# Generate text from JSON structure
|
||||
textContent = self._generateTextFromJson(extractedContent, title)
|
||||
|
||||
return textContent, "text/plain"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "text/plain")
|
||||
else:
|
||||
filename = self._determineFilename(title, "text/plain")
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=textContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering text: {str(e)}")
|
||||
# Return minimal text fallback
|
||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
||||
fallbackContent = f"{title}\n\nError rendering report: {str(e)}"
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=fallbackContent.encode('utf-8'),
|
||||
mimeType="text/plain",
|
||||
filename=self._determineFilename(title, "text/plain")
|
||||
)
|
||||
]
|
||||
|
||||
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||
"""Generate text content from structured JSON document."""
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@ Excel renderer for report generation using openpyxl.
|
|||
"""
|
||||
|
||||
from .rendererBaseTemplate import BaseRenderer
|
||||
from typing import Dict, Any, Tuple, List
|
||||
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||
from typing import Dict, Any, List
|
||||
import io
|
||||
import base64
|
||||
from datetime import datetime, UTC
|
||||
|
|
@ -37,20 +38,43 @@ class RendererXlsx(BaseRenderer):
|
|||
"""Return priority for Excel renderer."""
|
||||
return 110
|
||||
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||
try:
|
||||
if not OPENPYXL_AVAILABLE:
|
||||
# Fallback to CSV if openpyxl not available
|
||||
from .rendererCsv import RendererCsv
|
||||
csvRenderer = RendererCsv()
|
||||
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
return csvContent, "text/csv"
|
||||
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
# Generate Excel using AI-analyzed styling
|
||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||
|
||||
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
||||
# Determine filename from document or title
|
||||
documents = extractedContent.get("documents", [])
|
||||
if documents and isinstance(documents[0], dict):
|
||||
filename = documents[0].get("filename")
|
||||
if not filename:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
else:
|
||||
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||
|
||||
# Convert Excel content to bytes if it's a string (base64)
|
||||
if isinstance(excelContent, str):
|
||||
try:
|
||||
excel_bytes = base64.b64decode(excelContent)
|
||||
except Exception:
|
||||
excel_bytes = excelContent.encode('utf-8')
|
||||
else:
|
||||
excel_bytes = excelContent
|
||||
|
||||
return [
|
||||
RenderedDocument(
|
||||
documentData=excel_bytes,
|
||||
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
filename=filename
|
||||
)
|
||||
]
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||
|
|
|
|||
|
|
@ -106,6 +106,18 @@ class ContentValidator:
|
|||
if section.get("textPreview"):
|
||||
sectionSummary["textPreview"] = section.get("textPreview")
|
||||
|
||||
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
if contentPartIds and not elements:
|
||||
# Prüfe ob contentPartsMetadata vorhanden ist
|
||||
contentPartsMetadata = section.get("contentPartsMetadata", [])
|
||||
if contentPartsMetadata:
|
||||
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
|
||||
else:
|
||||
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
|
||||
sectionSummary["contentPartIds"] = contentPartIds
|
||||
sectionSummary["note"] = "ContentParts referenced but metadata not available"
|
||||
|
||||
# Include any additional fields from section (generic approach)
|
||||
# This ensures all action-specific fields are preserved
|
||||
for key, value in section.items():
|
||||
|
|
@ -141,6 +153,18 @@ class ContentValidator:
|
|||
sectionSummary["rowCount"] = len(rows)
|
||||
sectionSummary["headers"] = headers
|
||||
|
||||
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
|
||||
contentPartIds = section.get("contentPartIds", [])
|
||||
if contentPartIds and not elements:
|
||||
# Prüfe ob contentPartsMetadata vorhanden ist
|
||||
contentPartsMetadata = section.get("contentPartsMetadata", [])
|
||||
if contentPartsMetadata:
|
||||
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
|
||||
else:
|
||||
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
|
||||
sectionSummary["contentPartIds"] = contentPartIds
|
||||
sectionSummary["note"] = "ContentParts referenced but metadata not available"
|
||||
|
||||
# Include any additional fields from section (generic approach)
|
||||
for key, value in section.items():
|
||||
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
|
||||
|
|
|
|||
Loading…
Reference in a new issue