# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Document Generation Path Handles document generation using existing chapter/section model. """ import json import logging import time from typing import Dict, Any, List, Optional from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum from modules.datamodels.datamodelDocument import RenderedDocument logger = logging.getLogger(__name__) class DocumentGenerationPath: """Document generation path (existing functionality, refactored).""" def __init__(self, services): self.services = services async def generateDocument( self, userPrompt: str, documentList: Optional[Any] = None, # DocumentReferenceList documentIntents: Optional[List[DocumentIntent]] = None, contentParts: Optional[List[ContentPart]] = None, outputFormat: str = "txt", title: Optional[str] = None, parentOperationId: Optional[str] = None ) -> AiResponse: """ Generate document using existing chapter/section model. Returns: AiResponse with documents list """ # Create operation ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" docOperationId = f"doc_gen_{workflowId}_{int(time.time())}" # Start progress tracking self.services.chat.progressLogStart( docOperationId, "Document Generation", "Document Generation", f"Format: {outputFormat}", parentOperationId=parentOperationId ) try: # Schritt 5A: Kläre Dokument-Intents documents = [] if documentList: documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) if not documentIntents and documents: documentIntents = await self.services.ai.clarifyDocumentIntents( documents, userPrompt, {"outputFormat": outputFormat}, docOperationId ) # Schritt 5B: Extrahiere und bereite Content vor if documents: preparedContentParts = await self.services.ai.extractAndPrepareContent( documents, documentIntents or [], docOperationId ) # Merge mit bereitgestellten contentParts (falls vorhanden) if contentParts: # Prüfe auf pre-extracted Content for part in contentParts: if part.metadata.get("skipExtraction", False): # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig part.metadata.setdefault("contentFormat", "extracted") part.metadata.setdefault("isPreExtracted", True) preparedContentParts.extend(contentParts) contentParts = preparedContentParts # Schritt 5B.5: Documents are converted to contentParts (like pre-processed JSON files) # No AI extraction here - AI extraction happens during section generation if contentParts: logger.info(f"Using {len(contentParts)} content parts for generation (no AI extraction at this stage)") # Schritt 5C: Generiere Struktur structure = await self.services.ai.generateStructure( userPrompt, contentParts or [], outputFormat, docOperationId ) # Schritt 5D: Fülle Struktur # Language will be extracted from services (user intention analysis) in fillStructure filledStructure = await self.services.ai.fillStructure( structure, contentParts or [], userPrompt, docOperationId ) # Schritt 5E: Rendere Resultat # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder) renderedDocuments = await self.services.ai.renderResult( filledStructure, outputFormat, title or "Generated Document", userPrompt, docOperationId ) # Baue Response: Konvertiere alle gerenderten Dokumente zu DocumentData documentDataList = [] for renderedDoc in renderedDocuments: try: # Erstelle DocumentData für jedes gerenderte Dokument docDataObj = DocumentData( documentName=renderedDoc.filename, documentData=renderedDoc.documentData, mimeType=renderedDoc.mimeType, sourceJson=filledStructure if len(documentDataList) == 0 else None # Nur für erstes Dokument ) documentDataList.append(docDataObj) logger.debug(f"Added rendered document: {renderedDoc.filename} ({len(renderedDoc.documentData)} bytes, {renderedDoc.mimeType})") except Exception as e: logger.warning(f"Error creating document {renderedDoc.filename}: {str(e)}") if not documentDataList: raise ValueError("No documents were rendered") metadata = AiResponseMetadata( title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), operationType=OperationTypeEnum.DATA_GENERATE.value ) # Debug-Log (harmonisiert) self.services.utils.writeDebugFile( json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str), "document_generation_response" ) self.services.chat.progressLogFinish(docOperationId, True) return AiResponse( content=json.dumps(filledStructure), metadata=metadata, documents=documentDataList ) except Exception as e: logger.error(f"Error in document generation: {str(e)}") self.services.chat.progressLogFinish(docOperationId, False) raise