enhanced generation engine with chapters as structure, renderers to render a pipeline and deliver 1..n documents
This commit is contained in:
parent
9d4bd8ceef
commit
723f98ea7a
17 changed files with 1141 additions and 264 deletions
|
|
@ -107,5 +107,17 @@ class StructuredDocument(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
class RenderedDocument(BaseModel):
|
||||||
|
"""A single rendered document from a renderer."""
|
||||||
|
documentData: bytes = Field(description="Document content as bytes")
|
||||||
|
mimeType: str = Field(description="MIME type of the document (e.g., 'text/html', 'application/pdf')")
|
||||||
|
filename: str = Field(description="Filename for the document (e.g., 'report.html', 'image.png')")
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
json_encoders = {
|
||||||
|
bytes: lambda v: v.decode('utf-8', errors='replace') if isinstance(v, bytes) else v
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
# Update forward references
|
# Update forward references
|
||||||
ListItem.model_rebuild()
|
ListItem.model_rebuild()
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ from modules.services.serviceExtraction.mainServiceExtraction import ExtractionS
|
||||||
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
|
||||||
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
||||||
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
||||||
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
from modules.interfaces.interfaceAiObjects import AiObjects
|
from modules.interfaces.interfaceAiObjects import AiObjects
|
||||||
from modules.shared.jsonUtils import (
|
from modules.shared.jsonUtils import (
|
||||||
extractJsonString,
|
extractJsonString,
|
||||||
|
|
@ -50,7 +51,7 @@ class AiService:
|
||||||
if self.extractionService is None:
|
if self.extractionService is None:
|
||||||
logger.info("Initializing ExtractionService...")
|
logger.info("Initializing ExtractionService...")
|
||||||
self.extractionService = ExtractionService(self.services)
|
self.extractionService = ExtractionService(self.services)
|
||||||
|
|
||||||
# Initialize new submodules
|
# Initialize new submodules
|
||||||
from modules.services.serviceAi.subResponseParsing import ResponseParser
|
from modules.services.serviceAi.subResponseParsing import ResponseParser
|
||||||
from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
|
from modules.services.serviceAi.subDocumentIntents import DocumentIntentAnalyzer
|
||||||
|
|
@ -277,7 +278,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Delegate to ResponseParser."""
|
"""Delegate to ResponseParser."""
|
||||||
return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
|
return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata)
|
||||||
|
|
||||||
# Public API Methods
|
# Public API Methods
|
||||||
|
|
||||||
# Planning AI Call
|
# Planning AI Call
|
||||||
|
|
@ -494,20 +495,21 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
title: str,
|
title: str,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> Tuple[bytes, str]:
|
) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
Phase 5E: Rendert gefüllte Struktur zum Ziel-Format.
|
||||||
Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert.
|
Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
filledStructure: Gefüllte Struktur mit elements
|
filledStructure: Gefüllte Struktur mit elements
|
||||||
outputFormat: Ziel-Format (pdf, docx, html, etc.)
|
outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet
|
||||||
title: Dokument-Titel
|
title: Dokument-Titel
|
||||||
userPrompt: User-Anfrage
|
userPrompt: User-Anfrage
|
||||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tuple von (renderedContent, mimeType)
|
List of RenderedDocument objects.
|
||||||
|
Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei)
|
||||||
"""
|
"""
|
||||||
# Erstelle Operation-ID für Rendering
|
# Erstelle Operation-ID für Rendering
|
||||||
renderOperationId = f"{parentOperationId}_rendering"
|
renderOperationId = f"{parentOperationId}_rendering"
|
||||||
|
|
@ -526,51 +528,21 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
|
|
||||||
generationService = GenerationService(self.services)
|
generationService = GenerationService(self.services)
|
||||||
|
|
||||||
# Multi-Dokument-Rendering
|
# renderReport verarbeitet jetzt jedes Dokument einzeln
|
||||||
documents = filledStructure.get("documents", [])
|
# und gibt Liste von (documentData, mimeType, filename) zurück
|
||||||
|
renderedDocuments = await generationService.renderReport(
|
||||||
if len(documents) == 1:
|
filledStructure,
|
||||||
# Einzelnes Dokument - wie bisher
|
outputFormat,
|
||||||
renderedContent, mimeType, images = await generationService.renderReport(
|
title,
|
||||||
filledStructure,
|
userPrompt,
|
||||||
outputFormat,
|
self,
|
||||||
title,
|
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
||||||
userPrompt,
|
)
|
||||||
self,
|
|
||||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
# Mehrere Dokumente - rendere alle
|
|
||||||
# Option: Alle Sections zusammenführen und als ein Dokument rendern
|
|
||||||
all_sections = []
|
|
||||||
for doc in documents:
|
|
||||||
if "sections" in doc:
|
|
||||||
all_sections.extend(doc.get("sections", []))
|
|
||||||
|
|
||||||
# Erstelle temporäres Dokument mit allen Sections
|
|
||||||
merged_document = {
|
|
||||||
"metadata": filledStructure["metadata"],
|
|
||||||
"documents": [{
|
|
||||||
"id": "merged",
|
|
||||||
"title": title,
|
|
||||||
"filename": f"{title}.{outputFormat}",
|
|
||||||
"sections": all_sections
|
|
||||||
}]
|
|
||||||
}
|
|
||||||
|
|
||||||
renderedContent, mimeType, images = await generationService.renderReport(
|
|
||||||
merged_document,
|
|
||||||
outputFormat,
|
|
||||||
title,
|
|
||||||
userPrompt,
|
|
||||||
self,
|
|
||||||
parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie
|
|
||||||
)
|
|
||||||
|
|
||||||
# ChatLog abschließen
|
# ChatLog abschließen
|
||||||
self.services.chat.progressLogFinish(renderOperationId, True)
|
self.services.chat.progressLogFinish(renderOperationId, True)
|
||||||
|
|
||||||
return renderedContent, mimeType
|
return renderedDocuments
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.services.chat.progressLogFinish(renderOperationId, False)
|
self.services.chat.progressLogFinish(renderOperationId, False)
|
||||||
|
|
@ -712,7 +684,8 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
)
|
)
|
||||||
|
|
||||||
# Schritt 5E: Rendere Resultat
|
# Schritt 5E: Rendere Resultat
|
||||||
renderedContent, mimeType = await self._renderResult(
|
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
||||||
|
renderedDocuments = await self._renderResult(
|
||||||
filledStructure,
|
filledStructure,
|
||||||
outputFormat,
|
outputFormat,
|
||||||
title or "Generated Document",
|
title or "Generated Document",
|
||||||
|
|
@ -720,15 +693,24 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
aiOperationId
|
aiOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
# Baue Response
|
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
||||||
documentName = self._determineDocumentName(filledStructure, outputFormat, title)
|
documentDataList = []
|
||||||
|
for renderedDoc in renderedDocuments:
|
||||||
|
try:
|
||||||
|
# Erstelle DocumentData für jedes gerenderte Dokument
|
||||||
|
docDataObj = DocumentData(
|
||||||
|
documentName=renderedDoc.filename,
|
||||||
|
documentData=renderedDoc.documentData,
|
||||||
|
mimeType=renderedDoc.mimeType,
|
||||||
|
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
||||||
|
)
|
||||||
|
documentDataList.append(docDataObj)
|
||||||
|
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
||||||
|
|
||||||
docData = DocumentData(
|
if not documentDataList:
|
||||||
documentName=documentName,
|
raise ValueError("No documents were rendered")
|
||||||
documentData=renderedContent,
|
|
||||||
mimeType=mimeType,
|
|
||||||
sourceJson=filledStructure
|
|
||||||
)
|
|
||||||
|
|
||||||
metadata = AiResponseMetadata(
|
metadata = AiResponseMetadata(
|
||||||
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
||||||
|
|
@ -746,7 +728,7 @@ Respond with ONLY a JSON object in this exact format:
|
||||||
return AiResponse(
|
return AiResponse(
|
||||||
content=json.dumps(filledStructure),
|
content=json.dumps(filledStructure),
|
||||||
metadata=metadata,
|
metadata=metadata,
|
||||||
documents=[docData]
|
documents=documentDataList
|
||||||
)
|
)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|
|
||||||
|
|
@ -35,65 +35,184 @@ class StructureFiller:
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Phase 5D: Füllt Struktur mit tatsächlichem Content.
|
Phase 5D: Chapter-Content-Generierung (Zwei-Phasen-Ansatz).
|
||||||
Für jede Section:
|
|
||||||
- Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format
|
|
||||||
- Wenn generation_hint spezifiziert: Generiere AI-Content
|
|
||||||
|
|
||||||
**Implementierungsdetails:**
|
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter
|
||||||
- Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung)
|
Phase 5D.2: Füllt Sections mit ContentParts
|
||||||
- Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses)
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
structure: Struktur-Dict mit documents und sections
|
structure: Struktur-Dict mit documents und chapters (nicht sections!)
|
||||||
contentParts: Alle vorbereiteten ContentParts
|
contentParts: Alle vorbereiteten ContentParts
|
||||||
userPrompt: User-Anfrage
|
userPrompt: User-Anfrage
|
||||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Gefüllte Struktur mit elements in jeder Section
|
Gefüllte Struktur mit elements in jeder Section (nach Flattening)
|
||||||
"""
|
"""
|
||||||
# Erstelle Operation-ID für Struktur-Abfüllen
|
# Erstelle Operation-ID für Struktur-Abfüllen
|
||||||
fillOperationId = f"{parentOperationId}_structure_filling"
|
fillOperationId = f"{parentOperationId}_structure_filling"
|
||||||
|
|
||||||
|
# Prüfe ob Struktur Chapters oder Sections hat
|
||||||
|
hasChapters = False
|
||||||
|
for doc in structure.get("documents", []):
|
||||||
|
if "chapters" in doc:
|
||||||
|
hasChapters = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if not hasChapters:
|
||||||
|
# Fallback: Alte Struktur mit Sections direkt - verwende alte Logik
|
||||||
|
logger.warning("Structure has no chapters, using legacy section-based filling")
|
||||||
|
return await self._fillStructureLegacy(structure, contentParts, userPrompt, fillOperationId)
|
||||||
|
|
||||||
# Starte ChatLog mit Parent-Referenz
|
# Starte ChatLog mit Parent-Referenz
|
||||||
|
chapterCount = sum(len(doc.get("chapters", [])) for doc in structure.get("documents", []))
|
||||||
self.services.chat.progressLogStart(
|
self.services.chat.progressLogStart(
|
||||||
fillOperationId,
|
fillOperationId,
|
||||||
"Structure Filling",
|
"Chapter Content Generation",
|
||||||
"Filling",
|
"Filling",
|
||||||
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
|
f"Processing {chapterCount} chapters",
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
filledStructure = copy.deepcopy(structure)
|
filledStructure = copy.deepcopy(structure)
|
||||||
|
|
||||||
# Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden)
|
# Phase 5D.1: Sections-Struktur für jedes Chapter generieren
|
||||||
sections_to_process = []
|
filledStructure = await self._generateChapterSectionsStructure(
|
||||||
all_sections_list = [] # Für Kontext-Informationen
|
filledStructure, contentParts, userPrompt, fillOperationId
|
||||||
for doc in filledStructure.get("documents", []):
|
)
|
||||||
doc_sections = doc.get("sections", [])
|
|
||||||
all_sections_list.extend(doc_sections)
|
|
||||||
for section in doc_sections:
|
|
||||||
sections_to_process.append((doc, section))
|
|
||||||
|
|
||||||
# Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden)
|
# Phase 5D.2: Sections mit ContentParts füllen
|
||||||
for sectionIndex, (doc, section) in enumerate(sections_to_process):
|
filledStructure = await self._fillChapterSections(
|
||||||
sectionId = section.get("id")
|
filledStructure, contentParts, userPrompt, fillOperationId
|
||||||
contentPartIds = section.get("contentPartIds", [])
|
)
|
||||||
contentFormats = section.get("contentFormats", {})
|
|
||||||
generationHint = section.get("generation_hint")
|
# Flattening: Chapters zu Sections konvertieren
|
||||||
contentType = section.get("content_type", "paragraph")
|
flattenedStructure = self._flattenChaptersToSections(filledStructure)
|
||||||
|
|
||||||
|
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
||||||
|
flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts)
|
||||||
|
|
||||||
|
# ChatLog abschließen
|
||||||
|
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||||
|
|
||||||
|
return flattenedStructure
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.services.chat.progressLogFinish(fillOperationId, False)
|
||||||
|
logger.error(f"Error in fillStructure: {str(e)}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def _generateChapterSectionsStructure(
|
||||||
|
self,
|
||||||
|
chapterStructure: Dict[str, Any],
|
||||||
|
contentParts: List[ContentPart],
|
||||||
|
userPrompt: str,
|
||||||
|
parentOperationId: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Phase 5D.1: Generiert Sections-Struktur für jedes Chapter (ohne Content).
|
||||||
|
Sections enthalten: content_type, contentPartIds, generationHint, useAiCall
|
||||||
|
"""
|
||||||
|
for doc in chapterStructure.get("documents", []):
|
||||||
|
for chapter in doc.get("chapters", []):
|
||||||
|
chapterId = chapter.get("id", "unknown")
|
||||||
|
chapterLevel = chapter.get("level", 1)
|
||||||
|
chapterTitle = chapter.get("title", "")
|
||||||
|
generationHint = chapter.get("generationHint", "")
|
||||||
|
contentPartIds = chapter.get("contentPartIds", [])
|
||||||
|
contentPartInstructions = chapter.get("contentPartInstructions", {})
|
||||||
|
|
||||||
elements = []
|
chapterPrompt = self._buildChapterSectionsStructurePrompt(
|
||||||
|
chapterId=chapterId,
|
||||||
# Prüfe ob Aggregation nötig ist
|
chapterLevel=chapterLevel,
|
||||||
needsAggregation = self._needsAggregation(
|
chapterTitle=chapterTitle,
|
||||||
contentType=contentType,
|
generationHint=generationHint,
|
||||||
contentPartCount=len(contentPartIds)
|
contentPartIds=contentPartIds,
|
||||||
|
contentPartInstructions=contentPartInstructions,
|
||||||
|
contentParts=contentParts,
|
||||||
|
userPrompt=userPrompt
|
||||||
)
|
)
|
||||||
|
|
||||||
if needsAggregation and generationHint:
|
# Debug: Log Prompt
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
chapterPrompt,
|
||||||
|
f"chapter_structure_{chapterId}_prompt"
|
||||||
|
)
|
||||||
|
|
||||||
|
aiResponse = await self.aiService.callAiPlanning(
|
||||||
|
prompt=chapterPrompt,
|
||||||
|
debugType=f"chapter_structure_{chapterId}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Debug: Log Response
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
aiResponse,
|
||||||
|
f"chapter_structure_{chapterId}_response"
|
||||||
|
)
|
||||||
|
|
||||||
|
sectionsStructure = json.loads(
|
||||||
|
self.services.utils.jsonExtractString(aiResponse)
|
||||||
|
)
|
||||||
|
|
||||||
|
chapter["sections"] = sectionsStructure.get("sections", [])
|
||||||
|
|
||||||
|
# Setze useAiCall Flag (falls nicht von AI gesetzt)
|
||||||
|
for section in chapter["sections"]:
|
||||||
|
if "useAiCall" not in section:
|
||||||
|
contentType = section.get("content_type", "paragraph")
|
||||||
|
useAiCall = contentType != "paragraph"
|
||||||
|
|
||||||
|
# Prüfe contentPartInstructions
|
||||||
|
if not useAiCall:
|
||||||
|
for partId in section.get("contentPartIds", []):
|
||||||
|
instruction = contentPartInstructions.get(partId, {}).get("instruction", "")
|
||||||
|
if instruction and instruction.lower() not in ["include full text", "include all content", "use full extracted text"]:
|
||||||
|
useAiCall = True
|
||||||
|
break
|
||||||
|
|
||||||
|
section["useAiCall"] = useAiCall
|
||||||
|
|
||||||
|
return chapterStructure
|
||||||
|
|
||||||
|
async def _fillChapterSections(
|
||||||
|
self,
|
||||||
|
chapterStructure: Dict[str, Any],
|
||||||
|
contentParts: List[ContentPart],
|
||||||
|
userPrompt: str,
|
||||||
|
parentOperationId: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Phase 5D.2: Füllt Sections mit ContentParts.
|
||||||
|
"""
|
||||||
|
# Sammle alle Sections für sequenzielle Verarbeitung
|
||||||
|
sections_to_process = []
|
||||||
|
all_sections_list = [] # Für Kontext-Informationen
|
||||||
|
for doc in chapterStructure.get("documents", []):
|
||||||
|
for chapter in doc.get("chapters", []):
|
||||||
|
for section in chapter.get("sections", []):
|
||||||
|
all_sections_list.append(section)
|
||||||
|
sections_to_process.append((doc, chapter, section))
|
||||||
|
|
||||||
|
# Sequenzielle Section-Generierung
|
||||||
|
fillOperationId = parentOperationId
|
||||||
|
for sectionIndex, (doc, chapter, section) in enumerate(sections_to_process):
|
||||||
|
sectionId = section.get("id")
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
contentFormats = section.get("contentFormats", {})
|
||||||
|
generationHint = section.get("generation_hint")
|
||||||
|
contentType = section.get("content_type", "paragraph")
|
||||||
|
useAiCall = section.get("useAiCall", False)
|
||||||
|
|
||||||
|
elements = []
|
||||||
|
|
||||||
|
# Prüfe ob Aggregation nötig ist
|
||||||
|
needsAggregation = self._needsAggregation(
|
||||||
|
contentType=contentType,
|
||||||
|
contentPartCount=len(contentPartIds)
|
||||||
|
)
|
||||||
|
|
||||||
|
if needsAggregation and useAiCall:
|
||||||
# Aggregation: Alle Parts zusammen verarbeiten
|
# Aggregation: Alle Parts zusammen verarbeiten
|
||||||
sectionParts = [
|
sectionParts = [
|
||||||
self._findContentPartById(pid, contentParts)
|
self._findContentPartById(pid, contentParts)
|
||||||
|
|
@ -201,8 +320,8 @@ class StructureFiller:
|
||||||
})
|
})
|
||||||
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||||
# NICHT raise - Section wird mit Fehlermeldung gerendert
|
# NICHT raise - Section wird mit Fehlermeldung gerendert
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Einzelverarbeitung: Jeder Part einzeln
|
# Einzelverarbeitung: Jeder Part einzeln
|
||||||
for partId in contentPartIds:
|
for partId in contentPartIds:
|
||||||
part = self._findContentPartById(partId, contentParts)
|
part = self._findContentPartById(partId, contentParts)
|
||||||
|
|
@ -308,19 +427,429 @@ class StructureFiller:
|
||||||
"source": part.metadata.get("documentId"),
|
"source": part.metadata.get("documentId"),
|
||||||
"extractionPrompt": part.metadata.get("extractionPrompt")
|
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||||
})
|
})
|
||||||
|
|
||||||
|
section["elements"] = elements
|
||||||
|
|
||||||
|
return chapterStructure
|
||||||
|
|
||||||
|
def _addContentPartsMetadata(
|
||||||
|
self,
|
||||||
|
structure: Dict[str, Any],
|
||||||
|
contentParts: List[ContentPart]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Fügt ContentParts-Metadaten zur Struktur hinzu, wenn contentPartIds vorhanden sind.
|
||||||
|
Dies hilft der Validierung, den Kontext der ContentParts zu verstehen.
|
||||||
|
"""
|
||||||
|
# Erstelle Mapping von ContentPart-ID zu Metadaten
|
||||||
|
contentPartsMap = {}
|
||||||
|
for part in contentParts:
|
||||||
|
contentPartsMap[part.id] = {
|
||||||
|
"id": part.id,
|
||||||
|
"format": part.metadata.get("contentFormat", "unknown"),
|
||||||
|
"type": part.typeGroup,
|
||||||
|
"mimeType": part.mimeType,
|
||||||
|
"originalFileName": part.metadata.get("originalFileName"),
|
||||||
|
"usageHint": part.metadata.get("usageHint"),
|
||||||
|
"documentId": part.metadata.get("documentId"),
|
||||||
|
"dataSize": len(str(part.data)) if part.data else 0
|
||||||
|
}
|
||||||
|
|
||||||
|
# Füge Metadaten zu Sections hinzu, die contentPartIds haben
|
||||||
|
for doc in structure.get("documents", []):
|
||||||
|
# Prüfe ob Chapters vorhanden sind (neue Struktur)
|
||||||
|
if "chapters" in doc:
|
||||||
|
for chapter in doc.get("chapters", []):
|
||||||
|
# Füge Metadaten zu Chapter-Level contentPartIds hinzu
|
||||||
|
chapterContentPartIds = chapter.get("contentPartIds", [])
|
||||||
|
if chapterContentPartIds:
|
||||||
|
chapter["contentPartsMetadata"] = []
|
||||||
|
for partId in chapterContentPartIds:
|
||||||
|
if partId in contentPartsMap:
|
||||||
|
chapter["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||||
|
|
||||||
|
# Füge Metadaten zu Sections hinzu
|
||||||
|
for section in chapter.get("sections", []):
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
if contentPartIds:
|
||||||
|
section["contentPartsMetadata"] = []
|
||||||
|
for partId in contentPartIds:
|
||||||
|
if partId in contentPartsMap:
|
||||||
|
section["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||||
|
|
||||||
|
# Prüfe ob Sections direkt vorhanden sind (Legacy-Struktur)
|
||||||
|
elif "sections" in doc:
|
||||||
|
for section in doc.get("sections", []):
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
if contentPartIds:
|
||||||
|
section["contentPartsMetadata"] = []
|
||||||
|
for partId in contentPartIds:
|
||||||
|
if partId in contentPartsMap:
|
||||||
|
section["contentPartsMetadata"].append(contentPartsMap[partId])
|
||||||
|
|
||||||
|
return structure
|
||||||
|
|
||||||
|
def _flattenChaptersToSections(
|
||||||
|
self,
|
||||||
|
chapterStructure: Dict[str, Any]
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Flattening: Konvertiert Chapters zu finaler Section-Struktur.
|
||||||
|
Jedes Chapter wird zu einer Heading-Section + dessen Sections.
|
||||||
|
"""
|
||||||
|
result = {
|
||||||
|
"metadata": chapterStructure.get("metadata", {}),
|
||||||
|
"documents": []
|
||||||
|
}
|
||||||
|
|
||||||
|
for doc in chapterStructure.get("documents", []):
|
||||||
|
flattened_doc = {
|
||||||
|
"id": doc.get("id"),
|
||||||
|
"title": doc.get("title"),
|
||||||
|
"filename": doc.get("filename"),
|
||||||
|
"sections": []
|
||||||
|
}
|
||||||
|
|
||||||
|
for chapter in doc.get("chapters", []):
|
||||||
|
# 1. Vordefinierte Heading-Section für Chapter-Title
|
||||||
|
heading_section = {
|
||||||
|
"id": f"{chapter['id']}_heading",
|
||||||
|
"content_type": "heading",
|
||||||
|
"elements": [{
|
||||||
|
"type": "heading",
|
||||||
|
"content": chapter.get("title"),
|
||||||
|
"level": chapter.get("level", 1)
|
||||||
|
}]
|
||||||
|
}
|
||||||
|
flattened_doc["sections"].append(heading_section)
|
||||||
|
|
||||||
|
# 2. Generierte Sections
|
||||||
|
flattened_doc["sections"].extend(chapter.get("sections", []))
|
||||||
|
|
||||||
|
result["documents"].append(flattened_doc)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _fillStructureLegacy(
|
||||||
|
self,
|
||||||
|
structure: Dict[str, Any],
|
||||||
|
contentParts: List[ContentPart],
|
||||||
|
userPrompt: str,
|
||||||
|
fillOperationId: str
|
||||||
|
) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Legacy: Füllt Struktur mit Sections direkt (für Rückwärtskompatibilität).
|
||||||
|
"""
|
||||||
|
# Starte ChatLog
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
fillOperationId,
|
||||||
|
"Structure Filling (Legacy)",
|
||||||
|
"Filling",
|
||||||
|
f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections",
|
||||||
|
parentOperationId=fillOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
filledStructure = copy.deepcopy(structure)
|
||||||
|
|
||||||
|
# Sammle alle Sections
|
||||||
|
sections_to_process = []
|
||||||
|
all_sections_list = []
|
||||||
|
for doc in filledStructure.get("documents", []):
|
||||||
|
doc_sections = doc.get("sections", [])
|
||||||
|
all_sections_list.extend(doc_sections)
|
||||||
|
for section in doc_sections:
|
||||||
|
sections_to_process.append((doc, section))
|
||||||
|
|
||||||
|
# Verarbeite Sections (bestehende Logik)
|
||||||
|
for sectionIndex, (doc, section) in enumerate(sections_to_process):
|
||||||
|
sectionId = section.get("id")
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
contentFormats = section.get("contentFormats", {})
|
||||||
|
generationHint = section.get("generation_hint")
|
||||||
|
contentType = section.get("content_type", "paragraph")
|
||||||
|
|
||||||
|
elements = []
|
||||||
|
|
||||||
|
# Prüfe ob Aggregation nötig ist
|
||||||
|
needsAggregation = self._needsAggregation(
|
||||||
|
contentType=contentType,
|
||||||
|
contentPartCount=len(contentPartIds)
|
||||||
|
)
|
||||||
|
|
||||||
|
if needsAggregation and generationHint:
|
||||||
|
# Aggregation: Alle Parts zusammen verarbeiten
|
||||||
|
sectionParts = [
|
||||||
|
self._findContentPartById(pid, contentParts)
|
||||||
|
for pid in contentPartIds
|
||||||
|
]
|
||||||
|
sectionParts = [p for p in sectionParts if p is not None]
|
||||||
|
|
||||||
|
if sectionParts:
|
||||||
|
# Filtere nur extracted Parts für Aggregation
|
||||||
|
extractedParts = [
|
||||||
|
p for p in sectionParts
|
||||||
|
if contentFormats.get(p.id, p.metadata.get("contentFormat")) == "extracted"
|
||||||
|
]
|
||||||
|
nonExtractedParts = [
|
||||||
|
p for p in sectionParts
|
||||||
|
if contentFormats.get(p.id, p.metadata.get("contentFormat")) != "extracted"
|
||||||
|
]
|
||||||
|
|
||||||
|
# Verarbeite non-extracted Parts separat
|
||||||
|
for part in nonExtractedParts:
|
||||||
|
contentFormat = contentFormats.get(part.id, part.metadata.get("contentFormat"))
|
||||||
|
|
||||||
|
if contentFormat == "reference":
|
||||||
|
elements.append({
|
||||||
|
"type": "reference",
|
||||||
|
"documentReference": part.metadata.get("documentReference"),
|
||||||
|
"label": part.metadata.get("usageHint", part.label)
|
||||||
|
})
|
||||||
|
elif contentFormat == "object":
|
||||||
|
elements.append({
|
||||||
|
"type": part.typeGroup,
|
||||||
|
"base64Data": part.data,
|
||||||
|
"mimeType": part.mimeType,
|
||||||
|
"altText": part.metadata.get("usageHint", part.label)
|
||||||
|
})
|
||||||
|
|
||||||
|
# Aggregiere extracted Parts mit AI
|
||||||
|
if extractedParts:
|
||||||
|
generationPrompt = self._buildSectionGenerationPrompt(
|
||||||
|
section=section,
|
||||||
|
contentParts=extractedParts,
|
||||||
|
userPrompt=userPrompt,
|
||||||
|
generationHint=generationHint,
|
||||||
|
allSections=all_sections_list,
|
||||||
|
sectionIndex=sectionIndex,
|
||||||
|
isAggregation=True
|
||||||
|
)
|
||||||
|
|
||||||
|
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
||||||
|
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
sectionOperationId,
|
||||||
|
"Section Generation (Aggregation)",
|
||||||
|
"Section",
|
||||||
|
f"Generating section {sectionId} with {len(extractedParts)} parts",
|
||||||
|
parentOperationId=fillOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
generationPrompt,
|
||||||
|
f"section_content_{sectionId}_prompt"
|
||||||
|
)
|
||||||
|
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=generationPrompt,
|
||||||
|
contentParts=extractedParts,
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
|
)
|
||||||
|
)
|
||||||
|
aiResponse = await self.aiService.callAi(request)
|
||||||
|
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
aiResponse.content,
|
||||||
|
f"section_content_{sectionId}_response"
|
||||||
|
)
|
||||||
|
|
||||||
|
generatedElements = json.loads(
|
||||||
|
self.services.utils.jsonExtractString(aiResponse.content)
|
||||||
|
)
|
||||||
|
if isinstance(generatedElements, list):
|
||||||
|
elements.extend(generatedElements)
|
||||||
|
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
|
||||||
|
elements.extend(generatedElements["elements"])
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(sectionOperationId, True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.services.chat.progressLogFinish(sectionOperationId, False)
|
||||||
|
elements.append({
|
||||||
|
"type": "error",
|
||||||
|
"message": f"Error generating section {sectionId}: {str(e)}",
|
||||||
|
"sectionId": sectionId
|
||||||
|
})
|
||||||
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Einzelverarbeitung: Jeder Part einzeln
|
||||||
|
for partId in contentPartIds:
|
||||||
|
part = self._findContentPartById(partId, contentParts)
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
|
contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat"))
|
||||||
|
|
||||||
|
if contentFormat == "reference":
|
||||||
|
elements.append({
|
||||||
|
"type": "reference",
|
||||||
|
"documentReference": part.metadata.get("documentReference"),
|
||||||
|
"label": part.metadata.get("usageHint", part.label)
|
||||||
|
})
|
||||||
|
|
||||||
|
elif contentFormat == "object":
|
||||||
|
elements.append({
|
||||||
|
"type": part.typeGroup,
|
||||||
|
"base64Data": part.data,
|
||||||
|
"mimeType": part.mimeType,
|
||||||
|
"altText": part.metadata.get("usageHint", part.label)
|
||||||
|
})
|
||||||
|
|
||||||
|
elif contentFormat == "extracted":
|
||||||
|
if generationHint:
|
||||||
|
# AI-Call mit einzelnen ContentPart
|
||||||
|
generationPrompt = self._buildSectionGenerationPrompt(
|
||||||
|
section=section,
|
||||||
|
contentParts=[part],
|
||||||
|
userPrompt=userPrompt,
|
||||||
|
generationHint=generationHint,
|
||||||
|
allSections=all_sections_list,
|
||||||
|
sectionIndex=sectionIndex,
|
||||||
|
isAggregation=False
|
||||||
|
)
|
||||||
|
|
||||||
|
sectionOperationId = f"{fillOperationId}_section_{sectionId}"
|
||||||
|
|
||||||
|
self.services.chat.progressLogStart(
|
||||||
|
sectionOperationId,
|
||||||
|
"Section Generation",
|
||||||
|
"Section",
|
||||||
|
f"Generating section {sectionId}",
|
||||||
|
parentOperationId=fillOperationId
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
generationPrompt,
|
||||||
|
f"section_content_{sectionId}_prompt"
|
||||||
|
)
|
||||||
|
|
||||||
|
request = AiCallRequest(
|
||||||
|
prompt=generationPrompt,
|
||||||
|
contentParts=[part],
|
||||||
|
options=AiCallOptions(
|
||||||
|
operationType=OperationTypeEnum.DATA_ANALYSE,
|
||||||
|
priority=PriorityEnum.BALANCED,
|
||||||
|
processingMode=ProcessingModeEnum.DETAILED
|
||||||
|
)
|
||||||
|
)
|
||||||
|
aiResponse = await self.aiService.callAi(request)
|
||||||
|
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
aiResponse.content,
|
||||||
|
f"section_content_{sectionId}_response"
|
||||||
|
)
|
||||||
|
|
||||||
|
generatedElements = json.loads(
|
||||||
|
self.services.utils.jsonExtractString(aiResponse.content)
|
||||||
|
)
|
||||||
|
if isinstance(generatedElements, list):
|
||||||
|
elements.extend(generatedElements)
|
||||||
|
elif isinstance(generatedElements, dict) and "elements" in generatedElements:
|
||||||
|
elements.extend(generatedElements["elements"])
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(sectionOperationId, True)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
self.services.chat.progressLogFinish(sectionOperationId, False)
|
||||||
|
elements.append({
|
||||||
|
"type": "error",
|
||||||
|
"message": f"Error generating section {sectionId}: {str(e)}",
|
||||||
|
"sectionId": sectionId
|
||||||
|
})
|
||||||
|
logger.error(f"Error generating section {sectionId}: {str(e)}")
|
||||||
|
else:
|
||||||
|
elements.append({
|
||||||
|
"type": "extracted_text",
|
||||||
|
"content": part.data,
|
||||||
|
"source": part.metadata.get("documentId"),
|
||||||
|
"extractionPrompt": part.metadata.get("extractionPrompt")
|
||||||
|
})
|
||||||
|
|
||||||
section["elements"] = elements
|
section["elements"] = elements
|
||||||
|
|
||||||
# ChatLog abschließen
|
# Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung)
|
||||||
self.services.chat.progressLogFinish(fillOperationId, True)
|
filledStructure = self._addContentPartsMetadata(filledStructure, contentParts)
|
||||||
|
|
||||||
|
self.services.chat.progressLogFinish(fillOperationId, True)
|
||||||
return filledStructure
|
return filledStructure
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.services.chat.progressLogFinish(fillOperationId, False)
|
self.services.chat.progressLogFinish(fillOperationId, False)
|
||||||
logger.error(f"Error in fillStructure: {str(e)}")
|
logger.error(f"Error in _fillStructureLegacy: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
|
def _buildChapterSectionsStructurePrompt(
|
||||||
|
self,
|
||||||
|
chapterId: str,
|
||||||
|
chapterLevel: int,
|
||||||
|
chapterTitle: str,
|
||||||
|
generationHint: str,
|
||||||
|
contentPartIds: List[str],
|
||||||
|
contentPartInstructions: Dict[str, Any],
|
||||||
|
contentParts: List[ContentPart],
|
||||||
|
userPrompt: str
|
||||||
|
) -> str:
|
||||||
|
"""Baue Prompt für Chapter-Sections-Struktur-Generierung."""
|
||||||
|
# Baue ContentParts-Index (nur IDs, keine Previews!)
|
||||||
|
contentPartsIndex = ""
|
||||||
|
for partId in contentPartIds:
|
||||||
|
part = self._findContentPartById(partId, contentParts)
|
||||||
|
if not part:
|
||||||
|
continue
|
||||||
|
|
||||||
|
contentFormat = part.metadata.get("contentFormat", "unknown")
|
||||||
|
instruction = contentPartInstructions.get(partId, {}).get("instruction", "Use content as needed")
|
||||||
|
|
||||||
|
contentPartsIndex += f"\n- ContentPart ID: {partId}\n"
|
||||||
|
contentPartsIndex += f" Format: {contentFormat}\n"
|
||||||
|
contentPartsIndex += f" Type: {part.typeGroup}\n"
|
||||||
|
contentPartsIndex += f" Instruction: {instruction}\n"
|
||||||
|
|
||||||
|
if not contentPartsIndex:
|
||||||
|
contentPartsIndex = "\n(No content parts specified for this chapter)"
|
||||||
|
|
||||||
|
prompt = f"""TASK: Generate Chapter Sections Structure
|
||||||
|
|
||||||
|
CHAPTER METADATA:
|
||||||
|
- Chapter ID: {chapterId}
|
||||||
|
- Chapter Level: {chapterLevel}
|
||||||
|
- Chapter Title: {chapterTitle}
|
||||||
|
- Generation Hint: {generationHint}
|
||||||
|
|
||||||
|
WICHTIG: Chapter hat bereits vordefinierte Heading-Section.
|
||||||
|
Generiere NICHT eine Heading-Section für Chapter-Title!
|
||||||
|
|
||||||
|
AVAILABLE CONTENT PARTS:
|
||||||
|
{contentPartsIndex}
|
||||||
|
|
||||||
|
STANDARD JSON SCHEMA FOR SECTIONS:
|
||||||
|
Supported content_types: table, bullet_list, heading, paragraph, code_block, image
|
||||||
|
|
||||||
|
Return JSON:
|
||||||
|
{{
|
||||||
|
"sections": [
|
||||||
|
{{
|
||||||
|
"id": "section_1",
|
||||||
|
"content_type": "paragraph",
|
||||||
|
"contentPartIds": ["part_ext_1"],
|
||||||
|
"generationHint": "...",
|
||||||
|
"useAiCall": false,
|
||||||
|
"elements": []
|
||||||
|
}}
|
||||||
|
]
|
||||||
|
}}
|
||||||
|
|
||||||
|
CRITICAL: Return ONLY valid JSON. Do not include any explanatory text outside the JSON.
|
||||||
|
"""
|
||||||
|
return prompt
|
||||||
|
|
||||||
def _buildSectionGenerationPrompt(
|
def _buildSectionGenerationPrompt(
|
||||||
self,
|
self,
|
||||||
section: Dict[str, Any],
|
section: Dict[str, Any],
|
||||||
|
|
|
||||||
|
|
@ -32,11 +32,12 @@ class StructureGenerator:
|
||||||
parentOperationId: str
|
parentOperationId: str
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Phase 5C: Generiert Dokument-Struktur mit Sections.
|
Phase 5C: Generiert Chapter-Struktur (Table of Contents).
|
||||||
Jede Section spezifiziert:
|
Definiert für jedes Chapter:
|
||||||
- Welcher Content sollte in dieser Section sein
|
- Level, Title
|
||||||
- Welche ContentParts zu verwenden sind
|
- contentPartIds
|
||||||
- Format für jeden ContentPart
|
- contentPartInstructions
|
||||||
|
- generationHint
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
userPrompt: User-Anfrage
|
userPrompt: User-Anfrage
|
||||||
|
|
@ -45,7 +46,7 @@ class StructureGenerator:
|
||||||
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
parentOperationId: Parent Operation-ID für ChatLog-Hierarchie
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Struktur-Dict mit documents und sections
|
Struktur-Dict mit documents und chapters (nicht sections!)
|
||||||
"""
|
"""
|
||||||
# Erstelle Operation-ID für Struktur-Generierung
|
# Erstelle Operation-ID für Struktur-Generierung
|
||||||
structureOperationId = f"{parentOperationId}_structure_generation"
|
structureOperationId = f"{parentOperationId}_structure_generation"
|
||||||
|
|
@ -53,25 +54,36 @@ class StructureGenerator:
|
||||||
# Starte ChatLog mit Parent-Referenz
|
# Starte ChatLog mit Parent-Referenz
|
||||||
self.services.chat.progressLogStart(
|
self.services.chat.progressLogStart(
|
||||||
structureOperationId,
|
structureOperationId,
|
||||||
"Structure Generation",
|
"Chapter Structure Generation",
|
||||||
"Structure",
|
"Structure",
|
||||||
f"Generating structure for {outputFormat}",
|
f"Generating chapter structure for {outputFormat}",
|
||||||
parentOperationId=parentOperationId
|
parentOperationId=parentOperationId
|
||||||
)
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Baue Struktur-Prompt mit Content-Index
|
# Baue Chapter-Struktur-Prompt mit Content-Index
|
||||||
structurePrompt = self._buildStructurePrompt(
|
structurePrompt = self._buildChapterStructurePrompt(
|
||||||
userPrompt=userPrompt,
|
userPrompt=userPrompt,
|
||||||
contentParts=contentParts,
|
contentParts=contentParts,
|
||||||
outputFormat=outputFormat
|
outputFormat=outputFormat
|
||||||
)
|
)
|
||||||
|
|
||||||
# AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses)
|
# Debug: Log Prompt
|
||||||
# Debug-Logs werden bereits von callAiPlanning geschrieben
|
self.services.utils.writeDebugFile(
|
||||||
|
structurePrompt,
|
||||||
|
"chapter_structure_generation_prompt"
|
||||||
|
)
|
||||||
|
|
||||||
|
# AI-Call für Chapter-Struktur-Generierung
|
||||||
aiResponse = await self.aiService.callAiPlanning(
|
aiResponse = await self.aiService.callAiPlanning(
|
||||||
prompt=structurePrompt,
|
prompt=structurePrompt,
|
||||||
debugType="document_generation_structure"
|
debugType="chapter_structure_generation"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Debug: Log Response
|
||||||
|
self.services.utils.writeDebugFile(
|
||||||
|
aiResponse,
|
||||||
|
"chapter_structure_generation_response"
|
||||||
)
|
)
|
||||||
|
|
||||||
# Parse Struktur
|
# Parse Struktur
|
||||||
|
|
@ -87,13 +99,13 @@ class StructureGenerator:
|
||||||
logger.error(f"Error in generateStructure: {str(e)}")
|
logger.error(f"Error in generateStructure: {str(e)}")
|
||||||
raise
|
raise
|
||||||
|
|
||||||
def _buildStructurePrompt(
|
def _buildChapterStructurePrompt(
|
||||||
self,
|
self,
|
||||||
userPrompt: str,
|
userPrompt: str,
|
||||||
contentParts: List[ContentPart],
|
contentParts: List[ContentPart],
|
||||||
outputFormat: str
|
outputFormat: str
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Baue Prompt für Struktur-Generierung."""
|
"""Baue Prompt für Chapter-Struktur-Generierung."""
|
||||||
# Baue ContentParts-Index - filtere leere Parts heraus
|
# Baue ContentParts-Index - filtere leere Parts heraus
|
||||||
contentPartsIndex = ""
|
contentPartsIndex = ""
|
||||||
validParts = []
|
validParts = []
|
||||||
|
|
@ -179,14 +191,19 @@ class StructureGenerator:
|
||||||
AVAILABLE CONTENT PARTS:
|
AVAILABLE CONTENT PARTS:
|
||||||
{contentPartsIndex}
|
{contentPartsIndex}
|
||||||
|
|
||||||
TASK: Generiere Dokument-Struktur mit Sections.
|
TASK: Generiere Chapter-Struktur für die zu generierenden Dokumente.
|
||||||
Für jede Section, spezifiziere:
|
|
||||||
- section id
|
Für jedes Chapter:
|
||||||
- content_type (heading, paragraph, image, table, etc.)
|
- chapter id
|
||||||
- contentPartIds: [Liste von ContentPart-IDs zu verwenden]
|
- level (1, 2, 3, etc.)
|
||||||
- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist
|
- title
|
||||||
- generation_hint: Was AI für diese Section generieren soll
|
- contentPartIds: [Liste von ContentPart-IDs]
|
||||||
- elements: [] (leer, wird in nächster Phase gefüllt)
|
- contentPartInstructions: {{
|
||||||
|
"partId": {{
|
||||||
|
"instruction": "Wie Content strukturiert werden soll"
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
- generationHint: Beschreibung des Inhalts
|
||||||
|
|
||||||
OUTPUT FORMAT: {outputFormat}
|
OUTPUT FORMAT: {outputFormat}
|
||||||
|
|
||||||
|
|
@ -200,24 +217,19 @@ RETURN JSON:
|
||||||
"id": "doc_1",
|
"id": "doc_1",
|
||||||
"title": "Document Title",
|
"title": "Document Title",
|
||||||
"filename": "document.{outputFormat}",
|
"filename": "document.{outputFormat}",
|
||||||
"sections": [
|
"chapters": [
|
||||||
{{
|
{{
|
||||||
"id": "section_1",
|
"id": "chapter_1",
|
||||||
"content_type": "heading",
|
"level": 1,
|
||||||
"generation_hint": "Main title",
|
"title": "Introduction",
|
||||||
"contentPartIds": [],
|
|
||||||
"contentFormats": {{}},
|
|
||||||
"elements": []
|
|
||||||
}},
|
|
||||||
{{
|
|
||||||
"id": "section_2",
|
|
||||||
"content_type": "paragraph",
|
|
||||||
"generation_hint": "Introduction paragraph",
|
|
||||||
"contentPartIds": ["part_ext_1"],
|
"contentPartIds": ["part_ext_1"],
|
||||||
"contentFormats": {{
|
"contentPartInstructions": {{
|
||||||
"part_ext_1": "extracted"
|
"part_ext_1": {{
|
||||||
|
"instruction": "Use full extracted text"
|
||||||
|
}}
|
||||||
}},
|
}},
|
||||||
"elements": []
|
"generationHint": "Create introduction section",
|
||||||
|
"sections": []
|
||||||
}}
|
}}
|
||||||
]
|
]
|
||||||
}}]
|
}}]
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import uuid
|
||||||
import base64
|
import base64
|
||||||
import traceback
|
import traceback
|
||||||
from typing import Any, Dict, List, Optional, Callable
|
from typing import Any, Dict, List, Optional, Callable
|
||||||
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
from modules.datamodels.datamodelChat import ChatDocument
|
from modules.datamodels.datamodelChat import ChatDocument
|
||||||
from modules.services.serviceGeneration.subDocumentUtility import (
|
from modules.services.serviceGeneration.subDocumentUtility import (
|
||||||
getFileExtension,
|
getFileExtension,
|
||||||
|
|
@ -345,31 +346,31 @@ class GenerationService:
|
||||||
'workflowId': 'unknown'
|
'workflowId': 'unknown'
|
||||||
}
|
}
|
||||||
|
|
||||||
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]:
|
async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Render extracted JSON content to the specified output format.
|
Render extracted JSON content to the specified output format.
|
||||||
Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering).
|
Processes EACH document separately and calls renderer for each.
|
||||||
Always uses unified "documents" array format.
|
Each renderer can return 1..n documents (e.g., HTML + images).
|
||||||
Supports three content formats: reference, object (base64), extracted_text.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extractedContent: Structured JSON document from AI extraction
|
extractedContent: Structured JSON document with documents array
|
||||||
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
|
||||||
|
In future, each document can have its own format
|
||||||
title: Report title
|
title: Report title
|
||||||
userPrompt: User's original prompt for report generation
|
userPrompt: User's original prompt for report generation
|
||||||
aiService: AI service instance for generation prompt creation
|
aiService: AI service instance for generation prompt creation
|
||||||
parentOperationId: Optional parent operation ID for hierarchical logging
|
parentOperationId: Optional parent operation ID for hierarchical logging
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (rendered_content, mime_type, images_list)
|
List of RenderedDocument objects.
|
||||||
images_list: List of image dicts with base64Data, altText, caption, etc.
|
Each RenderedDocument represents one rendered file (main document or supporting file)
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Validate JSON input
|
# Validate JSON input
|
||||||
if not isinstance(extractedContent, dict):
|
if not isinstance(extractedContent, dict):
|
||||||
raise ValueError("extractedContent must be a JSON dictionary")
|
raise ValueError("extractedContent must be a JSON dictionary")
|
||||||
|
|
||||||
# Unified approach: Always expect "documents" array (single doc = n=1)
|
# Unified approach: Always expect "documents" array
|
||||||
if "documents" not in extractedContent:
|
if "documents" not in extractedContent:
|
||||||
raise ValueError("extractedContent must contain 'documents' array")
|
raise ValueError("extractedContent must contain 'documents' array")
|
||||||
|
|
||||||
|
|
@ -377,56 +378,45 @@ class GenerationService:
|
||||||
if len(documents) == 0:
|
if len(documents) == 0:
|
||||||
raise ValueError("No documents found in 'documents' array")
|
raise ValueError("No documents found in 'documents' array")
|
||||||
|
|
||||||
# Phase 5: Multi-Dokument-Rendering
|
metadata = extractedContent.get("metadata", {})
|
||||||
if len(documents) == 1:
|
allRenderedDocuments = []
|
||||||
# Single document - use existing logic
|
|
||||||
single_doc = documents[0]
|
# Process EACH document separately
|
||||||
if "sections" not in single_doc:
|
for docIndex, doc in enumerate(documents):
|
||||||
raise ValueError("Document must contain 'sections' field")
|
if not isinstance(doc, dict):
|
||||||
|
logger.warning(f"Skipping invalid document at index {docIndex}")
|
||||||
|
continue
|
||||||
|
|
||||||
# Pass standardized schema to renderer (maintains architecture)
|
if "sections" not in doc:
|
||||||
contentToRender = extractedContent # Pass full standardized schema
|
logger.warning(f"Document {doc.get('id', docIndex)} has no sections, skipping")
|
||||||
else:
|
continue
|
||||||
# Multiple documents - merge all sections into one document for rendering
|
|
||||||
# Option: Merge all sections from all documents into a single document
|
|
||||||
all_sections = []
|
|
||||||
for doc in documents:
|
|
||||||
if isinstance(doc, dict) and "sections" in doc:
|
|
||||||
sections = doc.get("sections", [])
|
|
||||||
if isinstance(sections, list):
|
|
||||||
all_sections.extend(sections)
|
|
||||||
|
|
||||||
if not all_sections:
|
# Determine format for this document
|
||||||
raise ValueError("No sections found in any document")
|
# TODO: In future, each document can have its own format field
|
||||||
|
# For now, use the global outputFormat
|
||||||
|
docFormat = doc.get("format", outputFormat)
|
||||||
|
|
||||||
# Create merged document with all sections
|
# Get renderer for this document's format
|
||||||
merged_document = {
|
renderer = self._getFormatRenderer(docFormat)
|
||||||
"metadata": extractedContent.get("metadata", {}),
|
if not renderer:
|
||||||
"documents": [{
|
logger.warning(f"Unsupported format '{docFormat}' for document {doc.get('id', docIndex)}, skipping")
|
||||||
"id": "merged",
|
continue
|
||||||
"title": title,
|
|
||||||
"filename": f"{title}.{outputFormat}",
|
# Create JSON structure with single document (preserving metadata)
|
||||||
"sections": all_sections
|
singleDocContent = {
|
||||||
}]
|
"metadata": metadata,
|
||||||
|
"documents": [doc] # Only this document
|
||||||
}
|
}
|
||||||
contentToRender = merged_document
|
|
||||||
logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections")
|
# Use document title or fallback to provided title
|
||||||
|
docTitle = doc.get("title", title)
|
||||||
# Get the appropriate renderer for the format
|
|
||||||
renderer = self._getFormatRenderer(outputFormat)
|
# Render this document (can return multiple files, e.g., HTML + images)
|
||||||
if not renderer:
|
renderedDocs = await renderer.render(singleDocContent, docTitle, userPrompt, aiService)
|
||||||
raise ValueError(f"Unsupported output format: {outputFormat}")
|
allRenderedDocuments.extend(renderedDocs)
|
||||||
|
|
||||||
# Render the JSON content directly (AI generation handled by main service)
|
logger.info(f"Rendered {len(documents)} document(s) into {len(allRenderedDocuments)} file(s)")
|
||||||
# Renderer receives standardized schema and extracts what it needs
|
return allRenderedDocuments
|
||||||
renderedContent, mimeType = await renderer.render(contentToRender, title, userPrompt, aiService)
|
|
||||||
|
|
||||||
# Get images from renderer if available
|
|
||||||
images = []
|
|
||||||
if hasattr(renderer, 'getRenderedImages'):
|
|
||||||
images = renderer.getRenderedImages()
|
|
||||||
|
|
||||||
return renderedContent, mimeType, images
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
|
logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,9 @@ Base renderer class for all format renderers.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import Dict, Any, Tuple, List
|
from typing import Dict, Any, List
|
||||||
from modules.datamodels.datamodelJson import supportedSectionTypes
|
from modules.datamodels.datamodelJson import supportedSectionTypes
|
||||||
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
|
|
@ -50,21 +51,49 @@ class BaseRenderer(ABC):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Render extracted JSON content to the target format.
|
Render extracted JSON content to multiple documents.
|
||||||
|
Each renderer must implement this method.
|
||||||
|
Can return 1..n documents (e.g., HTML + images).
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
extractedContent: Structured JSON content with sections and metadata
|
extractedContent: Structured JSON content with sections and metadata (contains single document)
|
||||||
title: Report title
|
title: Report title
|
||||||
userPrompt: Original user prompt for context
|
userPrompt: Original user prompt for context
|
||||||
aiService: AI service instance for additional processing
|
aiService: AI service instance for additional processing
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
tuple: (renderedContent, mimeType)
|
List of RenderedDocument objects.
|
||||||
|
First document is the main document, additional documents are supporting files (e.g., images).
|
||||||
|
Even if only one document is returned, it must be wrapped in a list.
|
||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def _determineFilename(self, title: str, mimeType: str) -> str:
|
||||||
|
"""Determine filename from title and mimeType."""
|
||||||
|
import re
|
||||||
|
# Get extension from mimeType
|
||||||
|
extensionMap = {
|
||||||
|
"text/html": "html",
|
||||||
|
"application/pdf": "pdf",
|
||||||
|
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": "docx",
|
||||||
|
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": "xlsx",
|
||||||
|
"text/plain": "txt",
|
||||||
|
"text/markdown": "md",
|
||||||
|
"application/json": "json",
|
||||||
|
"text/csv": "csv"
|
||||||
|
}
|
||||||
|
extension = extensionMap.get(mimeType, "txt")
|
||||||
|
|
||||||
|
# Sanitize title for filename
|
||||||
|
sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title)
|
||||||
|
sanitized = re.sub(r"_+", "_", sanitized).strip("_")
|
||||||
|
if not sanitized:
|
||||||
|
sanitized = "document"
|
||||||
|
|
||||||
|
return f"{sanitized}.{extension}"
|
||||||
|
|
||||||
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||||
"""
|
"""
|
||||||
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
|
Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]}
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ CSV renderer for report generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
class RendererCsv(BaseRenderer):
|
class RendererCsv(BaseRenderer):
|
||||||
"""Renders content to CSV format with format-specific extraction."""
|
"""Renders content to CSV format with format-specific extraction."""
|
||||||
|
|
@ -25,13 +26,28 @@ class RendererCsv(BaseRenderer):
|
||||||
"""Return priority for CSV renderer."""
|
"""Return priority for CSV renderer."""
|
||||||
return 70
|
return 70
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to CSV format."""
|
"""Render extracted JSON content to CSV format."""
|
||||||
try:
|
try:
|
||||||
# Generate CSV directly from JSON (no styling needed for CSV)
|
# Generate CSV directly from JSON (no styling needed for CSV)
|
||||||
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
csvContent = await self._generateCsvFromJson(extractedContent, title)
|
||||||
|
|
||||||
return csvContent, "text/csv"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "text/csv")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "text/csv")
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=csvContent.encode('utf-8'),
|
||||||
|
mimeType="text/csv",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering CSV: {str(e)}")
|
self.logger.error(f"Error rendering CSV: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ DOCX renderer for report generation using python-docx.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
import io
|
import io
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
|
|
@ -38,7 +39,7 @@ class RendererDocx(BaseRenderer):
|
||||||
"""Return priority for DOCX renderer."""
|
"""Return priority for DOCX renderer."""
|
||||||
return 115
|
return 115
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
"""Render extracted JSON content to DOCX format using AI-analyzed styling."""
|
||||||
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
self.services.utils.debugLogToFile(f"DOCX RENDER CALLED: title={title}, user_prompt={userPrompt[:50] if userPrompt else 'None'}...", "DOCX_RENDERER")
|
||||||
try:
|
try:
|
||||||
|
|
@ -46,18 +47,48 @@ class RendererDocx(BaseRenderer):
|
||||||
# Fallback to HTML if python-docx not available
|
# Fallback to HTML if python-docx not available
|
||||||
from .rendererHtml import RendererHtml
|
from .rendererHtml import RendererHtml
|
||||||
htmlRenderer = RendererHtml()
|
htmlRenderer = RendererHtml()
|
||||||
htmlContent, _ = await htmlRenderer.render(extractedContent, title)
|
return await htmlRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||||
return htmlContent, "text/html"
|
|
||||||
|
|
||||||
# Generate DOCX using AI-analyzed styling
|
# Generate DOCX using AI-analyzed styling
|
||||||
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
docx_content = await self._generateDocxFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return docx_content, "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.wordprocessingml.document")
|
||||||
|
|
||||||
|
# Convert DOCX content to bytes if it's a string (base64)
|
||||||
|
if isinstance(docx_content, str):
|
||||||
|
try:
|
||||||
|
docx_bytes = base64.b64decode(docx_content)
|
||||||
|
except Exception:
|
||||||
|
docx_bytes = docx_content.encode('utf-8')
|
||||||
|
else:
|
||||||
|
docx_bytes = docx_content
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=docx_bytes,
|
||||||
|
mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering DOCX: {str(e)}")
|
self.logger.error(f"Error rendering DOCX: {str(e)}")
|
||||||
# Return minimal fallback
|
# Return minimal fallback
|
||||||
return f"DOCX Generation Error: {str(e)}", "text/plain"
|
fallbackContent = f"DOCX Generation Error: {str(e)}"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=self._determineFilename(title, "text/plain")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
async def _generateDocxFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate DOCX content from structured JSON document."""
|
"""Generate DOCX content from structured JSON document."""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ HTML renderer for report generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
class RendererHtml(BaseRenderer):
|
class RendererHtml(BaseRenderer):
|
||||||
"""Renders content to HTML format with format-specific extraction."""
|
"""Renders content to HTML format with format-specific extraction."""
|
||||||
|
|
@ -25,29 +26,66 @@ class RendererHtml(BaseRenderer):
|
||||||
"""Return priority for HTML renderer."""
|
"""Return priority for HTML renderer."""
|
||||||
return 100
|
return 100
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to HTML format using AI-analyzed styling."""
|
"""
|
||||||
try:
|
Render HTML document with images as separate files.
|
||||||
# Extract images first
|
Returns list of documents: [HTML document, image1, image2, ...]
|
||||||
images = self._extractImages(extractedContent)
|
"""
|
||||||
|
import base64
|
||||||
|
|
||||||
|
# Extract images first
|
||||||
|
images = self._extractImages(extractedContent)
|
||||||
|
|
||||||
|
# Store images in instance for later retrieval
|
||||||
|
self._renderedImages = images
|
||||||
|
|
||||||
|
# Generate HTML using AI-analyzed styling
|
||||||
|
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
|
# Replace base64 data URIs with relative file paths if images exist
|
||||||
|
if images:
|
||||||
|
htmlContent = self._replaceImageDataUris(htmlContent, images)
|
||||||
|
|
||||||
|
# Determine HTML filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
htmlFilename = documents[0].get("filename")
|
||||||
|
if not htmlFilename:
|
||||||
|
htmlFilename = self._determineFilename(title, "text/html")
|
||||||
|
else:
|
||||||
|
htmlFilename = self._determineFilename(title, "text/html")
|
||||||
|
|
||||||
|
# Start with HTML document
|
||||||
|
resultDocuments = [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=htmlContent.encode('utf-8'),
|
||||||
|
mimeType="text/html",
|
||||||
|
filename=htmlFilename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
# Add images as separate documents
|
||||||
|
for img in images:
|
||||||
|
base64Data = img.get("base64Data", "")
|
||||||
|
filename = img.get("filename", f"image_{len(resultDocuments)}.png")
|
||||||
|
mimeType = img.get("mimeType", "image/png")
|
||||||
|
|
||||||
# Store images in instance for later retrieval
|
if base64Data:
|
||||||
self._renderedImages = images
|
try:
|
||||||
|
# Decode base64 to bytes
|
||||||
# Generate HTML using AI-analyzed styling
|
imageBytes = base64.b64decode(base64Data)
|
||||||
htmlContent = await self._generateHtmlFromJson(extractedContent, title, userPrompt, aiService)
|
resultDocuments.append(
|
||||||
|
RenderedDocument(
|
||||||
# Replace base64 data URIs with relative file paths if images exist
|
documentData=imageBytes,
|
||||||
if images:
|
mimeType=mimeType,
|
||||||
htmlContent = self._replaceImageDataUris(htmlContent, images)
|
filename=filename
|
||||||
|
)
|
||||||
return htmlContent, "text/html"
|
)
|
||||||
|
self.logger.debug(f"Added image file: {filename} ({len(imageBytes)} bytes)")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering HTML: {str(e)}")
|
self.logger.warning(f"Error creating image file {filename}: {str(e)}")
|
||||||
# Return minimal HTML fallback
|
|
||||||
self._renderedImages = [] # Initialize empty list on error
|
return resultDocuments
|
||||||
return f"<html><head><title>{title}</title></head><body><h1>{title}</h1><p>Error rendering report: {str(e)}</p></body></html>", "text/html"
|
|
||||||
|
|
||||||
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
async def _generateHtmlFromJson(self, jsonContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
"""Generate HTML content from structured JSON document using AI-generated styling."""
|
||||||
|
|
@ -597,8 +635,31 @@ class RendererHtml(BaseRenderer):
|
||||||
|
|
||||||
if base64Data:
|
if base64Data:
|
||||||
sectionId = section.get("id", "unknown")
|
sectionId = section.get("id", "unknown")
|
||||||
|
|
||||||
|
# Bestimme MIME-Type und Extension
|
||||||
|
mimeType = element.get("mimeType", "image/png")
|
||||||
|
if not mimeType or mimeType == "unknown":
|
||||||
|
# Versuche MIME-Type aus base64 zu erkennen
|
||||||
|
if base64Data.startswith("/9j/"):
|
||||||
|
mimeType = "image/jpeg"
|
||||||
|
elif base64Data.startswith("iVBORw0KGgo"):
|
||||||
|
mimeType = "image/png"
|
||||||
|
else:
|
||||||
|
mimeType = "image/png" # Default
|
||||||
|
|
||||||
|
# Bestimme Extension basierend auf MIME-Type
|
||||||
|
extension = "png"
|
||||||
|
if mimeType == "image/jpeg" or mimeType == "image/jpg":
|
||||||
|
extension = "jpg"
|
||||||
|
elif mimeType == "image/png":
|
||||||
|
extension = "png"
|
||||||
|
elif mimeType == "image/gif":
|
||||||
|
extension = "gif"
|
||||||
|
elif mimeType == "image/webp":
|
||||||
|
extension = "webp"
|
||||||
|
|
||||||
# Generate filename from section ID
|
# Generate filename from section ID
|
||||||
filename = f"{sectionId}.png"
|
filename = f"{sectionId}.{extension}"
|
||||||
# Clean filename (remove invalid characters)
|
# Clean filename (remove invalid characters)
|
||||||
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
|
filename = "".join(c if c.isalnum() or c in "._-" else "_" for c in filename)
|
||||||
|
|
||||||
|
|
@ -607,7 +668,8 @@ class RendererHtml(BaseRenderer):
|
||||||
"altText": element.get("altText", "Image"),
|
"altText": element.get("altText", "Image"),
|
||||||
"caption": element.get("caption"),
|
"caption": element.get("caption"),
|
||||||
"sectionId": sectionId,
|
"sectionId": sectionId,
|
||||||
"filename": filename
|
"filename": filename,
|
||||||
|
"mimeType": mimeType
|
||||||
})
|
})
|
||||||
self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
|
self.logger.debug(f"Extracted image from section {sectionId}: {filename}")
|
||||||
|
|
||||||
|
|
@ -633,8 +695,9 @@ class RendererHtml(BaseRenderer):
|
||||||
import base64
|
import base64
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# Find all image data URIs in HTML
|
# Find all image data URIs in HTML (verschiedene MIME-Types unterstützen)
|
||||||
dataUriPattern = r'data:image/png;base64,([A-Za-z0-9+/=]+)'
|
# Pattern: data:image/[type];base64,<base64>
|
||||||
|
dataUriPattern = r'data:image/[^;]+;base64,([A-Za-z0-9+/=]+)'
|
||||||
|
|
||||||
def replaceDataUri(match):
|
def replaceDataUri(match):
|
||||||
base64Data = match.group(1)
|
base64Data = match.group(1)
|
||||||
|
|
@ -642,7 +705,9 @@ class RendererHtml(BaseRenderer):
|
||||||
# Find matching image in images list
|
# Find matching image in images list
|
||||||
matchingImage = None
|
matchingImage = None
|
||||||
for img in images:
|
for img in images:
|
||||||
if img["base64Data"] == base64Data or img["base64Data"].startswith(base64Data[:100]):
|
imgBase64 = img.get("base64Data", "")
|
||||||
|
# Vergleiche base64-Daten (kann unterschiedliche Längen haben durch Padding)
|
||||||
|
if imgBase64 == base64Data or imgBase64.startswith(base64Data[:100]) or base64Data.startswith(imgBase64[:100]):
|
||||||
matchingImage = img
|
matchingImage = img
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|
@ -650,20 +715,25 @@ class RendererHtml(BaseRenderer):
|
||||||
# Use filename from image data (generated from section ID)
|
# Use filename from image data (generated from section ID)
|
||||||
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
|
filename = matchingImage.get("filename", f"image_{images.index(matchingImage) + 1}.png")
|
||||||
|
|
||||||
# Replace with relative path
|
# Replace with relative path (ohne Pfad, nur Dateiname)
|
||||||
altText = matchingImage.get("altText", "Image")
|
altText = matchingImage.get("altText", "Image")
|
||||||
caption = matchingImage.get("caption", "")
|
caption = matchingImage.get("caption", "")
|
||||||
|
|
||||||
|
# Entferne IMAGE_MARKER Kommentar falls vorhanden
|
||||||
|
imgTag = f'<img src="{filename}" alt="{altText}">'
|
||||||
|
|
||||||
if caption:
|
if caption:
|
||||||
return f'<figure><img src="{filename}" alt="{altText}"><figcaption>{caption}</figcaption></figure>'
|
return f'<figure>{imgTag}<figcaption>{caption}</figcaption></figure>'
|
||||||
else:
|
else:
|
||||||
return f'<img src="{filename}" alt="{altText}">'
|
return imgTag
|
||||||
else:
|
else:
|
||||||
# Keep original if no match found
|
# Keep original if no match found
|
||||||
return match.group(0)
|
return match.group(0)
|
||||||
|
|
||||||
# Replace all data URIs
|
# Replace all data URIs (auch IMAGE_MARKER Kommentare entfernen)
|
||||||
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
|
updatedHtml = re.sub(dataUriPattern, replaceDataUri, htmlContent)
|
||||||
|
# Entferne IMAGE_MARKER Kommentare die übrig geblieben sind
|
||||||
|
updatedHtml = re.sub(r'<!--IMAGE_MARKER:[^>]+-->', '', updatedHtml)
|
||||||
|
|
||||||
return updatedHtml
|
return updatedHtml
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -5,8 +5,10 @@ Image renderer for report generation using AI image generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
import logging
|
import logging
|
||||||
|
import base64
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -28,13 +30,37 @@ class RendererImage(BaseRenderer):
|
||||||
"""Return priority for image renderer."""
|
"""Return priority for image renderer."""
|
||||||
return 90
|
return 90
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to image format using AI image generation."""
|
"""Render extracted JSON content to image format using AI image generation."""
|
||||||
try:
|
try:
|
||||||
# Generate AI image from content
|
# Generate AI image from content
|
||||||
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
imageContent = await self._generateAiImage(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return imageContent, "image/png"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "image/png")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "image/png")
|
||||||
|
|
||||||
|
# Convert image content to bytes (base64 string or bytes)
|
||||||
|
if isinstance(imageContent, str):
|
||||||
|
try:
|
||||||
|
imageBytes = base64.b64decode(imageContent)
|
||||||
|
except Exception:
|
||||||
|
imageBytes = imageContent.encode('utf-8')
|
||||||
|
else:
|
||||||
|
imageBytes = imageContent
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=imageBytes,
|
||||||
|
mimeType="image/png",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering image: {str(e)}")
|
self.logger.error(f"Error rendering image: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ JSON renderer for report generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
import json
|
import json
|
||||||
|
|
||||||
class RendererJson(BaseRenderer):
|
class RendererJson(BaseRenderer):
|
||||||
|
|
@ -26,14 +27,29 @@ class RendererJson(BaseRenderer):
|
||||||
"""Return priority for JSON renderer."""
|
"""Return priority for JSON renderer."""
|
||||||
return 80
|
return 80
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to JSON format."""
|
"""Render extracted JSON content to JSON format."""
|
||||||
try:
|
try:
|
||||||
# The extracted content should already be JSON from the AI
|
# The extracted content should already be JSON from the AI
|
||||||
# Just validate and format it
|
# Just validate and format it
|
||||||
jsonContent = self._cleanJsonContent(extractedContent, title)
|
jsonContent = self._cleanJsonContent(extractedContent, title)
|
||||||
|
|
||||||
return jsonContent, "application/json"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "application/json")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "application/json")
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=jsonContent.encode('utf-8'),
|
||||||
|
mimeType="application/json",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering JSON: {str(e)}")
|
self.logger.error(f"Error rendering JSON: {str(e)}")
|
||||||
|
|
@ -43,7 +59,14 @@ class RendererJson(BaseRenderer):
|
||||||
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
"sections": [{"content_type": "paragraph", "elements": [{"text": f"Error rendering report: {str(e)}"}]}],
|
||||||
"metadata": {"error": str(e)}
|
"metadata": {"error": str(e)}
|
||||||
}
|
}
|
||||||
return json.dumps(fallbackData, indent=2), "application/json"
|
fallbackContent = json.dumps(fallbackData, indent=2)
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="application/json",
|
||||||
|
filename=self._determineFilename(title, "application/json")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
def _cleanJsonContent(self, content: Dict[str, Any], title: str) -> str:
|
||||||
"""Clean and validate JSON content from AI."""
|
"""Clean and validate JSON content from AI."""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ Markdown renderer for report generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
class RendererMarkdown(BaseRenderer):
|
class RendererMarkdown(BaseRenderer):
|
||||||
"""Renders content to Markdown format with format-specific extraction."""
|
"""Renders content to Markdown format with format-specific extraction."""
|
||||||
|
|
@ -25,18 +26,40 @@ class RendererMarkdown(BaseRenderer):
|
||||||
"""Return priority for markdown renderer."""
|
"""Return priority for markdown renderer."""
|
||||||
return 95
|
return 95
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to Markdown format."""
|
"""Render extracted JSON content to Markdown format."""
|
||||||
try:
|
try:
|
||||||
# Generate markdown from JSON structure
|
# Generate markdown from JSON structure
|
||||||
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
markdownContent = self._generateMarkdownFromJson(extractedContent, title)
|
||||||
|
|
||||||
return markdownContent, "text/markdown"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "text/markdown")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "text/markdown")
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=markdownContent.encode('utf-8'),
|
||||||
|
mimeType="text/markdown",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering markdown: {str(e)}")
|
self.logger.error(f"Error rendering markdown: {str(e)}")
|
||||||
# Return minimal markdown fallback
|
# Return minimal markdown fallback
|
||||||
return f"# {title}\n\nError rendering report: {str(e)}", "text/markdown"
|
fallbackContent = f"# {title}\n\nError rendering report: {str(e)}"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/markdown",
|
||||||
|
filename=self._determineFilename(title, "text/markdown")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
def _generateMarkdownFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||||
"""Generate markdown content from structured JSON document."""
|
"""Generate markdown content from structured JSON document."""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ PDF renderer for report generation using reportlab.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
import io
|
import io
|
||||||
import base64
|
import base64
|
||||||
|
|
||||||
|
|
@ -38,25 +39,56 @@ class RendererPdf(BaseRenderer):
|
||||||
"""Return priority for PDF renderer."""
|
"""Return priority for PDF renderer."""
|
||||||
return 120
|
return 120
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
"""Render extracted JSON content to PDF format using AI-analyzed styling."""
|
||||||
try:
|
try:
|
||||||
if not REPORTLAB_AVAILABLE:
|
if not REPORTLAB_AVAILABLE:
|
||||||
# Fallback to HTML if reportlab not available
|
# Fallback to HTML if reportlab not available
|
||||||
from .rendererHtml import RendererHtml
|
from .rendererHtml import RendererHtml
|
||||||
html_renderer = RendererHtml()
|
html_renderer = RendererHtml()
|
||||||
html_content, _ = await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
return await html_renderer.render(extractedContent, title, userPrompt, aiService)
|
||||||
return html_content, "text/html"
|
|
||||||
|
|
||||||
# Generate PDF using AI-analyzed styling
|
# Generate PDF using AI-analyzed styling
|
||||||
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
pdf_content = await self._generatePdfFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return pdf_content, "application/pdf"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "application/pdf")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "application/pdf")
|
||||||
|
|
||||||
|
# Convert PDF content to bytes if it's a string (base64)
|
||||||
|
if isinstance(pdf_content, str):
|
||||||
|
# Try to decode as base64, otherwise encode as UTF-8
|
||||||
|
try:
|
||||||
|
pdf_bytes = base64.b64decode(pdf_content)
|
||||||
|
except Exception:
|
||||||
|
pdf_bytes = pdf_content.encode('utf-8')
|
||||||
|
else:
|
||||||
|
pdf_bytes = pdf_content
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=pdf_bytes,
|
||||||
|
mimeType="application/pdf",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering PDF: {str(e)}")
|
self.logger.error(f"Error rendering PDF: {str(e)}")
|
||||||
# Return minimal fallback
|
# Return minimal fallback
|
||||||
return f"PDF Generation Error: {str(e)}", "text/plain"
|
fallbackContent = f"PDF Generation Error: {str(e)}"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=self._determineFilename(title, "text/plain")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
async def _generatePdfFromJson(self, json_content: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> str:
|
||||||
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
"""Generate PDF content from structured JSON document using AI-generated styling."""
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,9 @@ import io
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
from typing import Dict, Any, Optional, Tuple, List
|
from typing import Dict, Any, Optional, List
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -25,7 +26,7 @@ class RendererPptx(BaseRenderer):
|
||||||
"""Get list of supported output formats."""
|
"""Get list of supported output formats."""
|
||||||
return ["pptx", "ppt"]
|
return ["pptx", "ppt"]
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""
|
"""
|
||||||
Render content as PowerPoint presentation from JSON data.
|
Render content as PowerPoint presentation from JSON data.
|
||||||
|
|
||||||
|
|
@ -204,14 +205,44 @@ class RendererPptx(BaseRenderer):
|
||||||
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
|
pptx_base64 = base64.b64encode(pptx_bytes).decode('utf-8')
|
||||||
|
|
||||||
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
|
logger.info(f"Successfully rendered PowerPoint presentation: {len(pptx_bytes)} bytes")
|
||||||
return pptx_base64, "application/vnd.openxmlformats-officedocument.presentationml.presentation"
|
|
||||||
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.presentationml.presentation")
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=pptx_bytes,
|
||||||
|
mimeType="application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except ImportError:
|
except ImportError:
|
||||||
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
|
logger.error("python-pptx library not installed. Install with: pip install python-pptx")
|
||||||
return "python-pptx library not installed", "text/plain"
|
fallbackContent = "python-pptx library not installed"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=self._determineFilename(title, "text/plain")
|
||||||
|
)
|
||||||
|
]
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
logger.error(f"Error rendering PowerPoint presentation: {str(e)}")
|
||||||
return f"Error rendering PowerPoint presentation: {str(e)}", "text/plain"
|
fallbackContent = f"Error rendering PowerPoint presentation: {str(e)}"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=self._determineFilename(title, "text/plain")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
def _parseContentToSlides(self, content: str, title: str) -> list:
|
def _parseContentToSlides(self, content: str, title: str) -> list:
|
||||||
"""
|
"""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ Text renderer for report generation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
|
|
||||||
class RendererText(BaseRenderer):
|
class RendererText(BaseRenderer):
|
||||||
"""Renders content to plain text format with format-specific extraction."""
|
"""Renders content to plain text format with format-specific extraction."""
|
||||||
|
|
@ -47,18 +48,40 @@ class RendererText(BaseRenderer):
|
||||||
"""Return priority for text renderer."""
|
"""Return priority for text renderer."""
|
||||||
return 90
|
return 90
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to plain text format."""
|
"""Render extracted JSON content to plain text format."""
|
||||||
try:
|
try:
|
||||||
# Generate text from JSON structure
|
# Generate text from JSON structure
|
||||||
textContent = self._generateTextFromJson(extractedContent, title)
|
textContent = self._generateTextFromJson(extractedContent, title)
|
||||||
|
|
||||||
return textContent, "text/plain"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "text/plain")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "text/plain")
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=textContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering text: {str(e)}")
|
self.logger.error(f"Error rendering text: {str(e)}")
|
||||||
# Return minimal text fallback
|
# Return minimal text fallback
|
||||||
return f"{title}\n\nError rendering report: {str(e)}", "text/plain"
|
fallbackContent = f"{title}\n\nError rendering report: {str(e)}"
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=fallbackContent.encode('utf-8'),
|
||||||
|
mimeType="text/plain",
|
||||||
|
filename=self._determineFilename(title, "text/plain")
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
def _generateTextFromJson(self, jsonContent: Dict[str, Any], title: str) -> str:
|
||||||
"""Generate text content from structured JSON document."""
|
"""Generate text content from structured JSON document."""
|
||||||
|
|
|
||||||
|
|
@ -5,7 +5,8 @@ Excel renderer for report generation using openpyxl.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from .rendererBaseTemplate import BaseRenderer
|
from .rendererBaseTemplate import BaseRenderer
|
||||||
from typing import Dict, Any, Tuple, List
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
||||||
|
from typing import Dict, Any, List
|
||||||
import io
|
import io
|
||||||
import base64
|
import base64
|
||||||
from datetime import datetime, UTC
|
from datetime import datetime, UTC
|
||||||
|
|
@ -37,20 +38,43 @@ class RendererXlsx(BaseRenderer):
|
||||||
"""Return priority for Excel renderer."""
|
"""Return priority for Excel renderer."""
|
||||||
return 110
|
return 110
|
||||||
|
|
||||||
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> Tuple[str, str]:
|
async def render(self, extractedContent: Dict[str, Any], title: str, userPrompt: str = None, aiService=None) -> List[RenderedDocument]:
|
||||||
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
"""Render extracted JSON content to Excel format using AI-analyzed styling."""
|
||||||
try:
|
try:
|
||||||
if not OPENPYXL_AVAILABLE:
|
if not OPENPYXL_AVAILABLE:
|
||||||
# Fallback to CSV if openpyxl not available
|
# Fallback to CSV if openpyxl not available
|
||||||
from .rendererCsv import RendererCsv
|
from .rendererCsv import RendererCsv
|
||||||
csvRenderer = RendererCsv()
|
csvRenderer = RendererCsv()
|
||||||
csvContent, _ = await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
return await csvRenderer.render(extractedContent, title, userPrompt, aiService)
|
||||||
return csvContent, "text/csv"
|
|
||||||
|
|
||||||
# Generate Excel using AI-analyzed styling
|
# Generate Excel using AI-analyzed styling
|
||||||
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
excelContent = await self._generateExcelFromJson(extractedContent, title, userPrompt, aiService)
|
||||||
|
|
||||||
return excelContent, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
# Determine filename from document or title
|
||||||
|
documents = extractedContent.get("documents", [])
|
||||||
|
if documents and isinstance(documents[0], dict):
|
||||||
|
filename = documents[0].get("filename")
|
||||||
|
if not filename:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||||
|
else:
|
||||||
|
filename = self._determineFilename(title, "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet")
|
||||||
|
|
||||||
|
# Convert Excel content to bytes if it's a string (base64)
|
||||||
|
if isinstance(excelContent, str):
|
||||||
|
try:
|
||||||
|
excel_bytes = base64.b64decode(excelContent)
|
||||||
|
except Exception:
|
||||||
|
excel_bytes = excelContent.encode('utf-8')
|
||||||
|
else:
|
||||||
|
excel_bytes = excelContent
|
||||||
|
|
||||||
|
return [
|
||||||
|
RenderedDocument(
|
||||||
|
documentData=excel_bytes,
|
||||||
|
mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||||
|
filename=filename
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Error rendering Excel: {str(e)}")
|
self.logger.error(f"Error rendering Excel: {str(e)}")
|
||||||
|
|
|
||||||
|
|
@ -106,6 +106,18 @@ class ContentValidator:
|
||||||
if section.get("textPreview"):
|
if section.get("textPreview"):
|
||||||
sectionSummary["textPreview"] = section.get("textPreview")
|
sectionSummary["textPreview"] = section.get("textPreview")
|
||||||
|
|
||||||
|
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
if contentPartIds and not elements:
|
||||||
|
# Prüfe ob contentPartsMetadata vorhanden ist
|
||||||
|
contentPartsMetadata = section.get("contentPartsMetadata", [])
|
||||||
|
if contentPartsMetadata:
|
||||||
|
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
|
||||||
|
else:
|
||||||
|
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
|
||||||
|
sectionSummary["contentPartIds"] = contentPartIds
|
||||||
|
sectionSummary["note"] = "ContentParts referenced but metadata not available"
|
||||||
|
|
||||||
# Include any additional fields from section (generic approach)
|
# Include any additional fields from section (generic approach)
|
||||||
# This ensures all action-specific fields are preserved
|
# This ensures all action-specific fields are preserved
|
||||||
for key, value in section.items():
|
for key, value in section.items():
|
||||||
|
|
@ -141,6 +153,18 @@ class ContentValidator:
|
||||||
sectionSummary["rowCount"] = len(rows)
|
sectionSummary["rowCount"] = len(rows)
|
||||||
sectionSummary["headers"] = headers
|
sectionSummary["headers"] = headers
|
||||||
|
|
||||||
|
# Wenn contentPartIds vorhanden sind, aber keine elements: Füge ContentParts-Metadaten hinzu
|
||||||
|
contentPartIds = section.get("contentPartIds", [])
|
||||||
|
if contentPartIds and not elements:
|
||||||
|
# Prüfe ob contentPartsMetadata vorhanden ist
|
||||||
|
contentPartsMetadata = section.get("contentPartsMetadata", [])
|
||||||
|
if contentPartsMetadata:
|
||||||
|
sectionSummary["contentPartsMetadata"] = contentPartsMetadata
|
||||||
|
else:
|
||||||
|
# Fallback: Zeige nur IDs wenn Metadaten nicht verfügbar
|
||||||
|
sectionSummary["contentPartIds"] = contentPartIds
|
||||||
|
sectionSummary["note"] = "ContentParts referenced but metadata not available"
|
||||||
|
|
||||||
# Include any additional fields from section (generic approach)
|
# Include any additional fields from section (generic approach)
|
||||||
for key, value in section.items():
|
for key, value in section.items():
|
||||||
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
|
if key not in sectionSummary and key not in ["elements"]: # Skip elements as they're processed separately
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue