163 lines
6.6 KiB
Python
163 lines
6.6 KiB
Python
# Copyright (c) 2025 Patrick Motsch
|
|
# All rights reserved.
|
|
"""
|
|
Document Generation Path
|
|
|
|
Handles document generation using existing chapter/section model.
|
|
"""
|
|
|
|
import json
|
|
import logging
|
|
import time
|
|
from typing import Dict, Any, List, Optional
|
|
from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData
|
|
from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent
|
|
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum
|
|
from modules.datamodels.datamodelDocument import RenderedDocument
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
class DocumentGenerationPath:
|
|
"""Document generation path (existing functionality, refactored)."""
|
|
|
|
def __init__(self, services):
|
|
self.services = services
|
|
|
|
async def generateDocument(
|
|
self,
|
|
userPrompt: str,
|
|
documentList: Optional[Any] = None, # DocumentReferenceList
|
|
documentIntents: Optional[List[DocumentIntent]] = None,
|
|
contentParts: Optional[List[ContentPart]] = None,
|
|
outputFormat: str = "txt",
|
|
title: Optional[str] = None,
|
|
parentOperationId: Optional[str] = None
|
|
) -> AiResponse:
|
|
"""
|
|
Generate document using existing chapter/section model.
|
|
|
|
Returns: AiResponse with documents list
|
|
"""
|
|
# Create operation ID
|
|
workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}"
|
|
docOperationId = f"doc_gen_{workflowId}_{int(time.time())}"
|
|
|
|
# Start progress tracking
|
|
self.services.chat.progressLogStart(
|
|
docOperationId,
|
|
"Document Generation",
|
|
"Document Generation",
|
|
f"Format: {outputFormat}",
|
|
parentOperationId=parentOperationId
|
|
)
|
|
|
|
try:
|
|
# Schritt 5A: Kläre Dokument-Intents
|
|
documents = []
|
|
if documentList:
|
|
documents = self.services.chat.getChatDocumentsFromDocumentList(documentList)
|
|
|
|
if not documentIntents and documents:
|
|
documentIntents = await self.services.ai.clarifyDocumentIntents(
|
|
documents,
|
|
userPrompt,
|
|
{"outputFormat": outputFormat},
|
|
docOperationId
|
|
)
|
|
|
|
# Schritt 5B: Extrahiere und bereite Content vor
|
|
if documents:
|
|
preparedContentParts = await self.services.ai.extractAndPrepareContent(
|
|
documents,
|
|
documentIntents or [],
|
|
docOperationId
|
|
)
|
|
|
|
# Merge mit bereitgestellten contentParts (falls vorhanden)
|
|
if contentParts:
|
|
# Prüfe auf pre-extracted Content
|
|
for part in contentParts:
|
|
if part.metadata.get("skipExtraction", False):
|
|
# Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig
|
|
part.metadata.setdefault("contentFormat", "extracted")
|
|
part.metadata.setdefault("isPreExtracted", True)
|
|
preparedContentParts.extend(contentParts)
|
|
|
|
contentParts = preparedContentParts
|
|
|
|
# Schritt 5B.5: Documents are converted to contentParts (like pre-processed JSON files)
|
|
# No AI extraction here - AI extraction happens during section generation
|
|
if contentParts:
|
|
logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)")
|
|
|
|
# Schritt 5C: Generiere Struktur
|
|
structure = await self.services.ai.generateStructure(
|
|
userPrompt,
|
|
contentParts or [],
|
|
outputFormat,
|
|
docOperationId
|
|
)
|
|
|
|
# Schritt 5D: Fülle Struktur
|
|
# Language will be extracted from services (user intention analysis) in fillStructure
|
|
filledStructure = await self.services.ai.fillStructure(
|
|
structure,
|
|
contentParts or [],
|
|
userPrompt,
|
|
docOperationId
|
|
)
|
|
|
|
# Schritt 5E: Rendere Resultat
|
|
# Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder)
|
|
renderedDocuments = await self.services.ai.renderResult(
|
|
filledStructure,
|
|
outputFormat,
|
|
title or "Generated Document",
|
|
userPrompt,
|
|
docOperationId
|
|
)
|
|
|
|
# Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData
|
|
documentDataList = []
|
|
for renderedDoc in renderedDocuments:
|
|
try:
|
|
# Erstelle DocumentData für jedes gerenderte Dokument
|
|
docDataObj = DocumentData(
|
|
documentName=renderedDoc.filename,
|
|
documentData=renderedDoc.documentData,
|
|
mimeType=renderedDoc.mimeType,
|
|
sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument
|
|
)
|
|
documentDataList.append(docDataObj)
|
|
logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})")
|
|
except Exception as e:
|
|
logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}")
|
|
|
|
if not documentDataList:
|
|
raise ValueError("No documents were rendered")
|
|
|
|
metadata = AiResponseMetadata(
|
|
title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"),
|
|
operationType=OperationTypeEnum.DATA_GENERATE.value
|
|
)
|
|
|
|
# Debug-Log (harmonisiert)
|
|
self.services.utils.writeDebugFile(
|
|
json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str),
|
|
"document_generation_response"
|
|
)
|
|
|
|
self.services.chat.progressLogFinish(docOperationId, True)
|
|
|
|
return AiResponse(
|
|
content=json.dumps(filledStructure),
|
|
metadata=metadata,
|
|
documents=documentDataList
|
|
)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in document generation: {str(e)}")
|
|
self.services.chat.progressLogFinish(docOperationId, False)
|
|
raise
|
|
|