From e1b3cd36f01fc6bca7284d47e72ef99835b5139e Mon Sep 17 00:00:00 2001 From: ValueOn AG Date: Thu, 25 Dec 2025 00:09:27 +0100 Subject: [PATCH] enhanced core ai call document handling with document intent --- modules/datamodels/datamodelExtraction.py | 8 + modules/services/serviceAi/mainServiceAi.py | 1626 +++++++++++++---- .../chunking/chunkerStructure.py | 36 +- .../mainServiceExtraction.py | 243 ++- .../mainServiceGeneration.py | 148 +- .../renderers/rendererBaseTemplate.py | 16 +- .../renderers/rendererDocx.py | 28 +- .../renderers/rendererHtml.py | 30 +- .../renderers/rendererMarkdown.py | 30 +- .../renderers/rendererPdf.py | 31 +- .../renderers/rendererPptx.py | 64 +- .../renderers/rendererText.py | 30 +- .../serviceGeneration/subContentGenerator.py | 179 +- .../serviceGeneration/subContentIntegrator.py | 20 +- .../subDocumentPurposeAnalyzer.py | 316 ---- .../subPromptBuilderGeneration.py | 7 +- .../subStructureGenerator.py | 104 +- modules/shared/jsonUtils.py | 35 +- .../methods/methodAi/actions/__init__.py | 4 - .../methods/methodAi/actions/convert.py | 157 -- .../methods/methodAi/actions/extractData.py | 59 - .../methodAi/actions/generateDocument.py | 379 +--- .../methods/methodAi/actions/process.py | 77 +- .../workflows/methods/methodAi/methodAi.py | 106 -- .../methodContext/actions/extractContent.py | 36 +- .../methods/methodContext/methodContext.py | 4 +- .../ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md | 354 ---- ...ONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md | 459 ----- ...DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md | 1067 ----------- ...N_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md | 398 ---- modules/workflows/workflowManager.py | 266 ++- .../test09_document_generation_formats.py | 353 +++- 32 files changed, 2799 insertions(+), 3871 deletions(-) delete mode 100644 modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py delete mode 100644 modules/workflows/methods/methodAi/actions/convert.py delete mode 100644 modules/workflows/methods/methodAi/actions/extractData.py delete mode 100644 modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md delete mode 100644 modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md delete mode 100644 modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md delete mode 100644 modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md diff --git a/modules/datamodels/datamodelExtraction.py b/modules/datamodels/datamodelExtraction.py index 886df3b9..65f84de0 100644 --- a/modules/datamodels/datamodelExtraction.py +++ b/modules/datamodels/datamodelExtraction.py @@ -61,6 +61,14 @@ class MergeStrategy(BaseModel): capabilities: Optional[Dict[str, Any]] = Field(default=None, description="Model capabilities for intelligent merging") +class DocumentIntent(BaseModel): + """Intent-Analyse für ein einzelnes Dokument""" + documentId: str = Field(description="ID des Dokuments") + intents: List[str] = Field(description="Liste von Intents: ['extract', 'render', 'reference'] - mehrere möglich") + extractionPrompt: Optional[str] = Field(default=None, description="Spezifischer Prompt für Extraktion (z.B. 'Extract text from images for legends')") + reasoning: str = Field(description="Erklärung für Debugging/Transparenz: Warum wurde dieser Intent gewählt?") + + class ExtractionOptions(BaseModel): """Options for document extraction and processing with clear data structures.""" diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 648e922c..30e7cc88 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -4,11 +4,12 @@ import json import logging import re import time +import base64 from typing import Dict, Any, List, Optional, Tuple -from modules.datamodels.datamodelChat import PromptPlaceholder +from modules.datamodels.datamodelChat import PromptPlaceholder, ChatDocument from modules.services.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum -from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( @@ -183,7 +184,8 @@ Respond with ONLY a JSON object in this exact format: promptBuilder: Optional[callable] = None, promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, - userPrompt: Optional[str] = None + userPrompt: Optional[str] = None, + contentParts: Optional[List[ContentPart]] = None # ARCHITECTURE: Support ContentParts for large content ) -> str: """ Shared core function for AI calls with repair-based looping system. @@ -254,10 +256,14 @@ Respond with ONLY a JSON object in this exact format: try: if iterationOperationId: self.services.chat.progressLogUpdate(iterationOperationId, 0.3, "Calling AI model") + # ARCHITECTURE: Pass ContentParts directly to AiCallRequest + # This allows model-aware chunking to handle large content properly + # ContentParts are only passed in first iteration (continuations don't need them) request = AiCallRequest( prompt=iterationPrompt, context="", - options=options + options=options, + contentParts=contentParts if iteration == 1 else None # Only pass ContentParts in first iteration ) # Write the ACTUAL prompt sent to AI @@ -971,22 +977,1164 @@ If no trackable items can be identified, return: {{"kpis": []}} self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") return result + # Helper methods for callAiContent refactoring + + async def _handleImageGeneration( + self, + prompt: str, + options: AiCallOptions, + title: Optional[str], + aiOperationId: str + ) -> AiResponse: + """Handle IMAGE_GENERATE operation type.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") + + request = AiCallRequest( + prompt=prompt, + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No image data returned: {response.content}" + logger.error(f"Error in AI image generation: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + imageDoc = DocumentData( + documentName="generated_image.png", + documentData=response.content, + mimeType="image/png" + ) + + metadata = AiResponseMetadata( + title=title or "Generated Image", + operationType=options.operationType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + "ai.generate.image" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata, + documents=[imageDoc] + ) + + async def _handleWebOperation( + self, + prompt: str, + options: AiCallOptions, + opType: OperationTypeEnum, + aiOperationId: str + ) -> AiResponse: + """Handle WEB_SEARCH and WEB_CRAWL operation types.""" + self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") + + request = AiCallRequest( + prompt=prompt, # Raw JSON prompt - connector will parse it + context="", + options=options + ) + + response = await self.callAi(request) + + if not response.content: + errorMsg = f"No content returned from {opType.name}: {response.content}" + logger.error(f"Error in {opType.name}: {errorMsg}") + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) + + metadata = AiResponseMetadata( + operationType=opType.value + ) + + self.services.chat.storeWorkflowStat( + self.services.workflow, + response, + f"ai.{opType.name.lower()}" + ) + + self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=response.content, + metadata=metadata + ) + + def _getIntentForDocument( + self, + docId: str, + intents: Optional[List[DocumentIntent]] + ) -> Optional[DocumentIntent]: + """Find DocumentIntent for given documentId.""" + if not intents: + return None + for intent in intents: + if intent.documentId == docId: + return intent + return None + + async def _clarifyDocumentIntents( + self, + documents: List[ChatDocument], + userPrompt: str, + actionParameters: Dict[str, Any], + parentOperationId: str + ) -> List[DocumentIntent]: + """ + Phase 5A: Analysiert, welche Dokumente Extraktion vs Referenz benötigen. + Gibt DocumentIntent für jedes Dokument zurück. + + Args: + documents: Liste der zu verarbeitenden Dokumente + userPrompt: User-Anfrage + actionParameters: Action-spezifische Parameter (z.B. resultType, outputFormat) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von DocumentIntent-Objekten + """ + from modules.datamodels.datamodelChat import ChatDocument + + # Erstelle Operation-ID für Intent-Analyse + intentOperationId = f"{parentOperationId}_intent_analysis" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + intentOperationId, + "Document Intent Analysis", + "Intent Analysis", + f"Analyzing {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + # Mappe pre-extracted JSONs zu ursprünglichen Dokument-IDs für Intent-Analyse + documentMapping = {} # Maps original doc ID -> JSON doc ID + resolvedDocuments = [] + + for doc in documents: + preExtracted = self._resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + documentMapping[originalDocId] = doc.id + # Erstelle temporäres ChatDocument für ursprüngliches Dokument + from modules.datamodels.datamodelChat import ChatDocument + originalDoc = ChatDocument( + id=originalDocId, + fileName=preExtracted["originalDocument"]["fileName"], + mimeType=preExtracted["originalDocument"]["mimeType"], + fileSize=preExtracted["originalDocument"].get("fileSize", doc.fileSize), + fileId=doc.fileId # Behalte fileId vom JSON + ) + resolvedDocuments.append(originalDoc) + else: + resolvedDocuments.append(doc) + + # Baue Intent-Analyse-Prompt mit ursprünglichen Dokumenten + intentPrompt = self._buildIntentAnalysisPrompt(userPrompt, resolvedDocuments, actionParameters) + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=intentPrompt, + debugType="document_intent_analysis" + ) + + # Parse Result und mappe zurück zu JSON-Dokument-IDs falls nötig + intentsData = json.loads(self.services.utils.jsonExtractString(aiResponse)) + documentIntents = [] + for intent in intentsData.get("intents", []): + docId = intent.get("documentId") + # Wenn Intent für ursprüngliches Dokument, mappe zurück zu JSON-Dokument-ID + if docId in documentMapping: + intent["documentId"] = documentMapping[docId] + documentIntents.append(DocumentIntent(**intent)) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([intent.dict() for intent in documentIntents], indent=2), + "document_intent_analysis_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(intentOperationId, True) + + return documentIntents + + except Exception as e: + self.services.chat.progressLogFinish(intentOperationId, False) + logger.error(f"Error in _clarifyDocumentIntents: {str(e)}") + raise + + def _resolvePreExtractedDocument(self, document: ChatDocument) -> Optional[Dict[str, Any]]: + """ + Prüft ob ein JSON-Dokument bereits extrahierte ContentParts enthält. + Gibt Dict zurück mit: + - originalDocument: ChatDocument-Info des ursprünglichen Dokuments + - contentExtracted: ContentExtracted-Objekt mit Parts + - parts: Liste der ContentParts + + Returns None wenn kein pre-extracted Format erkannt wird. + """ + if document.mimeType != "application/json": + return None + + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if not docBytes: + return None + + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if not isinstance(jsonData, dict): + return None + + # Check for ContentExtracted format + documentData = None + if "parts" in jsonData and isinstance(jsonData.get("parts"), list): + # Direct ContentExtracted format: {"id": "...", "parts": [...], ...} + documentData = jsonData + else: + validationMetadata = jsonData.get("validationMetadata", {}) + actionType = validationMetadata.get("actionType") + if actionType == "context.extractContent": + # Format: {"validationMetadata": {"actionType": "context.extractContent"}, "documentData": {...}} + documentData = jsonData.get("documentData") + + if documentData: + from modules.datamodels.datamodelExtraction import ContentExtracted + + try: + contentExtracted = ContentExtracted(**documentData) + + if contentExtracted.parts: + # Extrahiere ursprüngliche Dokument-Info aus den Parts + originalDocId = None + originalFileName = None + originalMimeType = None + + for part in contentExtracted.parts: + if part.metadata: + # Versuche ursprüngliche Dokument-Info zu finden + if not originalDocId and part.metadata.get("documentId"): + originalDocId = part.metadata.get("documentId") + if not originalFileName and part.metadata.get("originalFileName"): + originalFileName = part.metadata.get("originalFileName") + if not originalMimeType and part.metadata.get("documentMimeType"): + originalMimeType = part.metadata.get("documentMimeType") + + # Falls nicht gefunden, verwende documentName aus ContentExtracted + if not originalFileName and hasattr(contentExtracted, 'id'): + # Versuche aus documentName zu extrahieren (z.B. "B2025-02c_28_extracted_...json" -> "B2025-02c_28.pdf") + if document.fileName and "_extracted_" in document.fileName: + originalFileName = document.fileName.split("_extracted_")[0] + ".pdf" + + return { + "originalDocument": { + "id": originalDocId or document.id, + "fileName": originalFileName or document.fileName, + "mimeType": originalMimeType or "application/pdf", + "fileSize": document.fileSize + }, + "contentExtracted": contentExtracted, + "parts": contentExtracted.parts + } + except Exception as parseError: + logger.debug(f"Could not parse ContentExtracted format: {str(parseError)}") + return None + + return None + except Exception as e: + logger.debug(f"Error resolving pre-extracted document {document.fileName}: {str(e)}") + return None + + def _buildIntentAnalysisPrompt( + self, + userPrompt: str, + documents: List[ChatDocument], + actionParameters: Dict[str, Any] + ) -> str: + """Baue Prompt für Intent-Analyse.""" + # Baue Dokument-Liste - zeige ursprüngliche Dokumente für pre-extracted JSONs + docListText = "" + for i, doc in enumerate(documents, 1): + # Prüfe ob es ein pre-extracted JSON ist + preExtracted = self._resolvePreExtractedDocument(doc) + + if preExtracted: + # Zeige ursprüngliches Dokument statt JSON + originalDoc = preExtracted["originalDocument"] + partsInfo = f" (contains {len(preExtracted['parts'])} pre-extracted parts: {', '.join([p.typeGroup for p in preExtracted['parts'] if p.data and len(str(p.data)) > 0])})" + docListText += f"\n{i}. Document ID: {originalDoc['id']}\n" + docListText += f" File Name: {originalDoc['fileName']}{partsInfo}\n" + docListText += f" MIME Type: {originalDoc['mimeType']}\n" + docListText += f" File Size: {originalDoc.get('fileSize', doc.fileSize)} bytes\n" + else: + # Normales Dokument + docListText += f"\n{i}. Document ID: {doc.id}\n" + docListText += f" File Name: {doc.fileName}\n" + docListText += f" MIME Type: {doc.mimeType}\n" + docListText += f" File Size: {doc.fileSize} bytes\n" + + outputFormat = actionParameters.get("outputFormat", "txt") + + prompt = f"""USER REQUEST: +{userPrompt} + +DOCUMENTS TO ANALYZE: +{docListText} + +TASK: For each document, determine its intents (can be multiple): +- "extract": Content extraction needed (text, structure, OCR, etc.) +- "render": Image/binary should be rendered as-is (visual element) +- "reference": Document reference/attachment (no extraction, just reference) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "intents": [ + {{ + "documentId": "doc_1", + "intents": ["extract"], # Array - can contain multiple! + "extractionPrompt": "Extract all text content, preserving structure", + "reasoning": "User needs text content for document generation" + }}, + {{ + "documentId": "doc_2", + "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "extractionPrompt": "Extract text content from image using vision AI", + "reasoning": "Image contains text that needs extraction, but also should be rendered visually" + }}, + {{ + "documentId": "doc_3", + "intents": ["reference"], + "extractionPrompt": null, + "reasoning": "Document is only used as reference, no extraction needed" + }} + ] +}} + +CRITICAL RULES: +1. For images (mimeType starts with "image/"): + - If user wants to "include" or "show" images → add "render" + - If user wants to "analyze", "read text", or "extract text" from images → add "extract" + - Can have BOTH "extract" and "render" if image needs both text extraction and visual rendering + +2. For text documents: + - If user mentions "template" or "structure" → "reference" or "extract" based on context + - If user mentions "reference" or "context" → "reference" + - Default → "extract" + +3. Consider output format: + - For formats like PDF, DOCX, PPTX: images usually need "render" + - For formats like CSV, JSON: usually "extract" only + - For HTML: can have both "extract" and "render" + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _extractAndPrepareContent( + self, + documents: List[ChatDocument], + documentIntents: List[DocumentIntent], + parentOperationId: str + ) -> List[ContentPart]: + """ + Phase 5B: Extrahiert Content basierend auf Intents und bereitet ContentParts mit Metadaten vor. + Gibt Liste von ContentParts im passenden Format zurück. + + WICHTIG: Ein Dokument kann mehrere ContentParts erzeugen, wenn mehrere Intents vorhanden sind. + Beispiel: Bild mit intents=["extract", "render"] erzeugt: + - ContentPart(contentFormat="object", ...) für Rendering + - ContentPart(contentFormat="extracted", ...) für Text-Analyse + + Args: + documents: Liste der zu verarbeitenden Dokumente + documentIntents: Liste von DocumentIntent-Objekten + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Liste von ContentParts mit vollständigen Metadaten + """ + # Erstelle Operation-ID für Extraktion + extractionOperationId = f"{parentOperationId}_content_extraction" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + extractionOperationId, + "Content Extraction", + "Extraction", + f"Extracting from {len(documents)} documents", + parentOperationId=parentOperationId + ) + + try: + allContentParts = [] + + for document in documents: + # Check if document is already a ContentExtracted document (pre-extracted JSON) + preExtracted = self._resolvePreExtractedDocument(document) + + if preExtracted: + # Verwende bereits extrahierte ContentParts direkt + contentExtracted = preExtracted["contentExtracted"] + intent = self._getIntentForDocument(document.id, documentIntents) + + if contentExtracted.parts: + for part in contentExtracted.parts: + # Überspringe leere Parts (Container ohne Daten) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + if part.typeGroup == "container": + continue # Überspringe leere Container + + if not part.metadata: + part.metadata = {} + + # Ensure metadata is complete + if "documentId" not in part.metadata: + part.metadata["documentId"] = document.id + if "contentFormat" not in part.metadata: + part.metadata["contentFormat"] = "extracted" + + # WICHTIG: Prüfe Intent für dieses Part + partIntent = intent.intents if intent else ["extract"] + + # Wenn Intent "render" für Images hat, erstelle auch object Part + if "render" in partIntent and part.typeGroup == "image" and part.data: + # Image-Part mit render Intent: Erstelle sowohl extracted als auch object Part + # 1. Extracted Part (bereits vorhanden) + part.metadata["intent"] = "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + # 2. Object Part für Rendering (base64 data ist bereits im extracted Part) + objectPart = ContentPart( + id=f"obj_{document.id}_{part.id}", + label=f"Object: {part.label or 'Image'}", + typeGroup="image", + mimeType=part.mimeType or "image/jpeg", + data=part.data, # Base64 data ist bereits vorhanden + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {preExtracted['originalDocument']['fileName']}", + "originalFileName": preExtracted["originalDocument"]["fileName"], + "relatedExtractedPartId": part.id + } + ) + allContentParts.append(objectPart) + else: + # Normales extracted Part + part.metadata["intent"] = partIntent[0] if partIntent else "extract" + part.metadata["fromExtractContent"] = True + part.metadata["skipExtraction"] = True + part.metadata["originalFileName"] = preExtracted["originalDocument"]["fileName"] + allContentParts.append(part) + + logger.info(f"✅ Using {len([p for p in contentExtracted.parts if p.data and len(str(p.data)) > 0])} pre-extracted ContentParts from ContentExtracted document {document.fileName}") + logger.info(f" Original document: {preExtracted['originalDocument']['fileName']}") + continue # Skip normal extraction for this document + + # Check if it's standardized JSON format (has "documents" or "sections") + if document.mimeType == "application/json": + try: + docBytes = self.services.interfaceDbComponent.getFileData(document.fileId) + if docBytes: + docData = docBytes.decode('utf-8') + jsonData = json.loads(docData) + + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.info(f"Document is already in standardized JSON format, using as reference") + # Create reference ContentPart for structured JSON + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="structure", + mimeType="application/json", + data=docData, + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "skipExtraction": True, + "intent": "reference" + } + ) + allContentParts.append(contentPart) + logger.info(f"✅ Using JSON document directly without extraction") + continue # Skip normal extraction for this document + except Exception as e: + logger.warning(f"Could not parse JSON document {document.fileName}, will extract normally: {str(e)}") + # Continue with normal extraction + + # Normal extraction path + intent = self._getIntentForDocument(document.id, documentIntents) + + if not intent: + # Default: extract für alle Dokumente ohne Intent + logger.warning(f"No intent found for document {document.id}, using default 'extract'") + intent = DocumentIntent( + documentId=document.id, + intents=["extract"], + extractionPrompt="Extract all content from the document", + reasoning="Default intent: no specific intent found" + ) + + # WICHTIG: Prüfe alle Intents - ein Dokument kann mehrere ContentParts erzeugen + + if "reference" in intent.intents: + # Erstelle Reference ContentPart + contentPart = ContentPart( + id=f"ref_{document.id}", + label=f"Reference: {document.fileName}", + typeGroup="reference", + mimeType=document.mimeType, + data="", + metadata={ + "contentFormat": "reference", + "documentId": document.id, + "documentReference": f"docItem:{document.id}:{document.fileName}", + "intent": "reference", + "usageHint": f"Reference document: {document.fileName}" + } + ) + allContentParts.append(contentPart) + + # WICHTIG: "render" und "extract" können beide vorhanden sein! + # In diesem Fall erzeugen wir BEIDE ContentParts + + if "render" in intent.intents: + # Für Images/Binary: extrahiere als Object + if document.mimeType.startswith("image/") or self._isBinary(document.mimeType): + try: + # Lade Binary-Daten (getFileData ist nicht async - keine await nötig) + binaryData = self.services.interfaceDbComponent.getFileData(document.fileId) + if not binaryData: + logger.warning(f"No binary data found for document {document.id}") + continue + base64Data = base64.b64encode(binaryData).decode('utf-8') + + contentPart = ContentPart( + id=f"obj_{document.id}", + label=f"Object: {document.fileName}", + typeGroup="image" if document.mimeType.startswith("image/") else "binary", + mimeType=document.mimeType, + data=base64Data, + metadata={ + "contentFormat": "object", + "documentId": document.id, + "intent": "render", + "usageHint": f"Render as visual element: {document.fileName}", + "originalFileName": document.fileName, + # Verknüpfung zu extracted Part (falls vorhanden) + "relatedExtractedPartId": f"ext_{document.id}" if "extract" in intent.intents else None + } + ) + allContentParts.append(contentPart) + except Exception as e: + logger.error(f"Failed to load binary data for document {document.id}: {str(e)}") + + if "extract" in intent.intents: + # Extrahiere Content mit Extraction Service + extractionPrompt = intent.extractionPrompt or "Extract all content from the document" + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + extractionPrompt, + f"content_extraction_prompt_{document.id}" + ) + + # Führe Extraktion aus + from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy + + extractionOptions = ExtractionOptions( + prompt=extractionPrompt, + mergeStrategy=MergeStrategy() + ) + + # extractContent ist nicht async - keine await nötig + extractedResults = self.services.extraction.extractContent( + [document], + extractionOptions, + operationId=extractionOperationId, + parentOperationId=extractionOperationId + ) + + # Konvertiere extrahierte Ergebnisse zu ContentParts mit Metadaten + for extracted in extractedResults: + for part in extracted.parts: + # Markiere als extracted Format + part.metadata.update({ + "contentFormat": "extracted", + "documentId": document.id, + "extractionPrompt": extractionPrompt, + "intent": "extract", + "usageHint": f"Use extracted content from {document.fileName}", + # Verknüpfung zu object Part (falls vorhanden) + "relatedObjectPartId": f"obj_{document.id}" if "render" in intent.intents else None + }) + # Stelle sicher, dass ID eindeutig ist (falls object Part existiert) + if "render" in intent.intents: + part.id = f"ext_{document.id}_{part.id}" + allContentParts.append(part) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps([part.dict() for part in allContentParts], indent=2, default=str), + "content_extraction_result" + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(extractionOperationId, True) + + return allContentParts + + except Exception as e: + self.services.chat.progressLogFinish(extractionOperationId, False) + logger.error(f"Error in _extractAndPrepareContent: {str(e)}") + raise + + def _isBinary(self, mimeType: str) -> bool: + """Prüfe ob MIME-Type binary ist.""" + binaryTypes = [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/x-zip-compressed" + ] + return mimeType in binaryTypes or mimeType.startswith("image/") or mimeType.startswith("video/") or mimeType.startswith("audio/") + + async def _generateStructure( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5C: Generiert Dokument-Struktur mit Sections. + Jede Section spezifiziert: + - Welcher Content sollte in dieser Section sein + - Welche ContentParts zu verwenden sind + - Format für jeden ContentPart + + Args: + userPrompt: User-Anfrage + contentParts: Alle vorbereiteten ContentParts mit Metadaten + outputFormat: Ziel-Format (html, docx, pdf, etc.) + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Struktur-Dict mit documents und sections + """ + # Erstelle Operation-ID für Struktur-Generierung + structureOperationId = f"{parentOperationId}_structure_generation" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + structureOperationId, + "Structure Generation", + "Structure", + f"Generating structure for {outputFormat}", + parentOperationId=parentOperationId + ) + + try: + # Baue Struktur-Prompt mit Content-Index + structurePrompt = self._buildStructurePrompt( + userPrompt=userPrompt, + contentParts=contentParts, + outputFormat=outputFormat + ) + + # AI-Call für Struktur-Generierung (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=structurePrompt, + debugType="document_generation_structure" + ) + + # Parse Struktur + structure = json.loads(self.services.utils.jsonExtractString(aiResponse)) + + # ChatLog abschließen + self.services.chat.progressLogFinish(structureOperationId, True) + + return structure + + except Exception as e: + self.services.chat.progressLogFinish(structureOperationId, False) + logger.error(f"Error in _generateStructure: {str(e)}") + raise + + def _buildStructurePrompt( + self, + userPrompt: str, + contentParts: List[ContentPart], + outputFormat: str + ) -> str: + """Baue Prompt für Struktur-Generierung.""" + # Baue ContentParts-Index - filtere leere Parts heraus + contentPartsIndex = "" + validParts = [] + for part in contentParts: + # Überspringe leere Parts (keine Daten oder nur Container ohne Inhalt) + if not part.data or (isinstance(part.data, str) and len(part.data.strip()) == 0): + # Überspringe Container-Parts ohne Daten + if part.typeGroup == "container" and not part.data: + continue + # Überspringe andere leere Parts + if not part.data: + continue + + validParts.append(part) + + # Baue Index nur für gültige Parts + for i, part in enumerate(validParts, 1): + contentFormat = part.metadata.get("contentFormat", "unknown") + dataPreview = "" + + if contentFormat == "extracted": + # Für Image-Parts: Zeige dass es ein Image ist + if part.typeGroup == "image": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "image" + dataPreview = f"Image data ({mimeType}, {dataLength} chars) - base64 encoded image content" + elif part.typeGroup == "container": + # Container ohne Daten überspringen wir bereits oben + dataPreview = "Container structure (no text content)" + else: + # Zeige Preview von extrahiertem Text + if part.data: + preview = part.data[:200] + "..." if len(part.data) > 200 else part.data + dataPreview = preview + else: + dataPreview = "(empty)" + elif contentFormat == "object": + dataLength = len(part.data) if part.data else 0 + mimeType = part.mimeType or "binary" + if part.typeGroup == "image": + dataPreview = f"Base64 encoded image ({mimeType}, {dataLength} chars)" + else: + dataPreview = f"Base64 encoded binary ({mimeType}, {dataLength} chars)" + elif contentFormat == "reference": + dataPreview = part.metadata.get("documentReference", "reference") + + contentPartsIndex += f"\n{i}. ContentPart ID: {part.id}\n" + contentPartsIndex += f" Format: {contentFormat}\n" + contentPartsIndex += f" Type: {part.typeGroup}\n" + contentPartsIndex += f" MIME Type: {part.mimeType or 'N/A'}\n" + contentPartsIndex += f" Source: {part.metadata.get('documentId', 'unknown')}\n" + contentPartsIndex += f" Usage hint: {part.metadata.get('usageHint', 'N/A')}\n" + contentPartsIndex += f" Data preview: {dataPreview}\n" + + if not contentPartsIndex: + contentPartsIndex = "\n(No content parts available)" + + prompt = f"""USER REQUEST: +{userPrompt} + +AVAILABLE CONTENT PARTS: +{contentPartsIndex} + +TASK: Generiere Dokument-Struktur mit Sections. +Für jede Section, spezifiziere: +- section id +- content_type (heading, paragraph, image, table, etc.) +- contentPartIds: [Liste von ContentPart-IDs zu verwenden] +- contentFormats: {{"partId": "reference|object|extracted"}} - Wie jeder ContentPart zu verwenden ist +- generation_hint: Was AI für diese Section generieren soll +- elements: [] (leer, wird in nächster Phase gefüllt) + +OUTPUT FORMAT: {outputFormat} + +RETURN JSON: +{{ + "metadata": {{ + "title": "Document Title", + "language": "de" + }}, + "documents": [{{ + "id": "doc_1", + "title": "Document Title", + "filename": "document.{outputFormat}", + "sections": [ + {{ + "id": "section_1", + "content_type": "heading", + "generation_hint": "Main title", + "contentPartIds": [], + "contentFormats": {{}}, + "elements": [] + }}, + {{ + "id": "section_2", + "content_type": "paragraph", + "generation_hint": "Introduction paragraph", + "contentPartIds": ["part_ext_1"], + "contentFormats": {{ + "part_ext_1": "extracted" + }}, + "elements": [] + }} + ] + }}] +}} + +Return ONLY valid JSON following the structure above. +""" + return prompt + + async def _fillStructure( + self, + structure: Dict[str, Any], + contentParts: List[ContentPart], + userPrompt: str, + parentOperationId: str + ) -> Dict[str, Any]: + """ + Phase 5D: Füllt Struktur mit tatsächlichem Content. + Für jede Section: + - Wenn contentPartIds spezifiziert: Verwende ContentParts im spezifizierten Format + - Wenn generation_hint spezifiziert: Generiere AI-Content + + **Implementierungsdetails:** + - Sections werden **parallel generiert**, wenn möglich (Performance-Optimierung) + - Fehlerhafte Sections werden mit Fehlermeldung gerendert (kein Abbruch des gesamten Prozesses) + + Args: + structure: Struktur-Dict mit documents und sections + contentParts: Alle vorbereiteten ContentParts + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Gefüllte Struktur mit elements in jeder Section + """ + import copy + + # Erstelle Operation-ID für Struktur-Abfüllen + fillOperationId = f"{parentOperationId}_structure_filling" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + fillOperationId, + "Structure Filling", + "Filling", + f"Filling {len(structure.get('documents', [{}])[0].get('sections', []))} sections", + parentOperationId=parentOperationId + ) + + try: + filledStructure = copy.deepcopy(structure) + + # Sammle alle Sections für sequenzielle Verarbeitung (parallel kann später optimiert werden) + sections_to_process = [] + for doc in filledStructure.get("documents", []): + for section in doc.get("sections", []): + sections_to_process.append((doc, section)) + + # Sequenzielle Section-Generierung (parallel kann später hinzugefügt werden) + for doc, section in sections_to_process: + sectionId = section.get("id") + contentPartIds = section.get("contentPartIds", []) + contentFormats = section.get("contentFormats", {}) + generationHint = section.get("generation_hint") + + elements = [] + + # Verarbeite ContentParts + for partId in contentPartIds: + part = self._findContentPartById(partId, contentParts) + if not part: + continue + + contentFormat = contentFormats.get(partId, part.metadata.get("contentFormat")) + + if contentFormat == "reference": + # Füge Dokument-Referenz hinzu + elements.append({ + "type": "reference", + "documentReference": part.metadata.get("documentReference"), + "label": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "object": + # Füge base64 Object hinzu + elements.append({ + "type": part.typeGroup, # "image", "binary", etc. + "base64Data": part.data, + "mimeType": part.mimeType, + "altText": part.metadata.get("usageHint", part.label) + }) + + elif contentFormat == "extracted": + # Füge extrahierten Text hinzu (kann in AI-Generierungs-Prompt verwendet werden) + elements.append({ + "type": "extracted_text", + "content": part.data, + "source": part.metadata.get("documentId"), + "extractionPrompt": part.metadata.get("extractionPrompt") + }) + + # Generiere AI-Content wenn nötig + if generationHint: + generationPrompt = self._buildSectionGenerationPrompt( + section=section, + contentParts=[self._findContentPartById(pid, contentParts) for pid in contentPartIds], + userPrompt=userPrompt, + generationHint=generationHint + ) + + # Erstelle Operation-ID für Section-Generierung + # Debug-Logs werden bereits von callAiPlanning geschrieben + sectionOperationId = f"{fillOperationId}_section_{sectionId}" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + sectionOperationId, + "Section Generation", + "Section", + f"Generating section {sectionId}", + parentOperationId=fillOperationId + ) + + try: + # Generiere Content (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.callAiPlanning( + prompt=generationPrompt, + debugType=f"section_generation_{sectionId}" + ) + + # Parse und füge zu elements hinzu + generatedElements = json.loads( + self.services.utils.jsonExtractString(aiResponse) + ) + if isinstance(generatedElements, list): + elements.extend(generatedElements) + elif isinstance(generatedElements, dict) and "elements" in generatedElements: + elements.extend(generatedElements["elements"]) + + # ChatLog abschließen + self.services.chat.progressLogFinish(sectionOperationId, True) + + except Exception as e: + # Fehlerhafte Section mit Fehlermeldung rendern (kein Abbruch!) + self.services.chat.progressLogFinish(sectionOperationId, False) + elements.append({ + "type": "error", + "message": f"Error generating section {sectionId}: {str(e)}", + "sectionId": sectionId + }) + logger.error(f"Error generating section {sectionId}: {str(e)}") + # NICHT raise - Section wird mit Fehlermeldung gerendert + + section["elements"] = elements + + # ChatLog abschließen + self.services.chat.progressLogFinish(fillOperationId, True) + + return filledStructure + + except Exception as e: + self.services.chat.progressLogFinish(fillOperationId, False) + logger.error(f"Error in _fillStructure: {str(e)}") + raise + + def _buildSectionGenerationPrompt( + self, + section: Dict[str, Any], + contentParts: List[Optional[ContentPart]], + userPrompt: str, + generationHint: str + ) -> str: + """Baue Prompt für Section-Generierung.""" + # Filtere None-Werte + validParts = [p for p in contentParts if p is not None] + + contentPartsText = "" + for part in validParts: + contentFormat = part.metadata.get("contentFormat", "unknown") + contentPartsText += f"\n- ContentPart {part.id}:\n" + contentPartsText += f" Format: {contentFormat}\n" + if contentFormat == "extracted": + contentPartsText += f" Content: {part.data[:500]}...\n" if len(part.data) > 500 else f" Content: {part.data}\n" + elif contentFormat == "reference": + contentPartsText += f" Reference: {part.metadata.get('documentReference')}\n" + elif contentFormat == "object": + contentPartsText += f" Object: {part.typeGroup} ({part.mimeType})\n" + + prompt = f"""USER REQUEST: +{userPrompt} + +SECTION TO GENERATE: +{generationHint} + +AVAILABLE CONTENT FOR THIS SECTION: +{contentPartsText} + +CRITICAL: Return ONLY a JSON object with an "elements" array. +Jedes Element sollte dem content_type der Section entsprechen. +""" + return prompt + + def _findContentPartById(self, partId: str, contentParts: List[ContentPart]) -> Optional[ContentPart]: + """Finde ContentPart nach ID.""" + for part in contentParts: + if part.id == partId: + return part + return None + + async def _renderResult( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: str, + userPrompt: str, + parentOperationId: str + ) -> Tuple[bytes, str]: + """ + Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. + Unterstützt Multi-Dokument-Rendering: Alle Dokumente werden gerendert. + + Args: + filledStructure: Gefüllte Struktur mit elements + outputFormat: Ziel-Format (pdf, docx, html, etc.) + title: Dokument-Titel + userPrompt: User-Anfrage + parentOperationId: Parent Operation-ID für ChatLog-Hierarchie + + Returns: + Tuple von (renderedContent, mimeType) + """ + # Erstelle Operation-ID für Rendering + renderOperationId = f"{parentOperationId}_rendering" + + # Starte ChatLog mit Parent-Referenz + self.services.chat.progressLogStart( + renderOperationId, + "Content Rendering", + "Rendering", + f"Rendering to {outputFormat} format", + parentOperationId=parentOperationId + ) + + try: + from modules.services.serviceGeneration.mainServiceGeneration import GenerationService + + generationService = GenerationService(self.services) + + # Multi-Dokument-Rendering + documents = filledStructure.get("documents", []) + + if len(documents) == 1: + # Einzelnes Dokument - wie bisher + renderedContent, mimeType, images = await generationService.renderReport( + filledStructure, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + else: + # Mehrere Dokumente - rendere alle + # Option: Alle Sections zusammenführen und als ein Dokument rendern + all_sections = [] + for doc in documents: + if "sections" in doc: + all_sections.extend(doc.get("sections", [])) + + # Erstelle temporäres Dokument mit allen Sections + merged_document = { + "metadata": filledStructure["metadata"], + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + + renderedContent, mimeType, images = await generationService.renderReport( + merged_document, + outputFormat, + title, + userPrompt, + self, + parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie + ) + + # ChatLog abschließen + self.services.chat.progressLogFinish(renderOperationId, True) + + return renderedContent, mimeType + + except Exception as e: + self.services.chat.progressLogFinish(renderOperationId, False) + logger.error(f"Error in _renderResult: {str(e)}") + raise + + def _shouldSkipContentPart( + self, + part: ContentPart + ) -> bool: + """Check if ContentPart should be skipped (already structured JSON).""" + if part.typeGroup == "structure" and part.mimeType == "application/json": + if part.metadata.get("skipExtraction", False): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (skipExtraction=True)") + return True + try: + if isinstance(part.data, str): + jsonData = json.loads(part.data) + if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): + logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (contains documents/sections)") + return True + except Exception: + pass # Not JSON, continue processing + return False + async def callAiContent( self, prompt: str, options: AiCallOptions, contentParts: Optional[List[ContentPart]] = None, + documentList: Optional[Any] = None, # DocumentReferenceList + documentIntents: Optional[List[DocumentIntent]] = None, outputFormat: Optional[str] = None, title: Optional[str] = None, - parentOperationId: Optional[str] = None # Parent operation ID for hierarchical logging + parentOperationId: Optional[str] = None ) -> AiResponse: """ - Unified AI content processing method (replaces callAiDocuments and callAiText). + Einheitliche AI-Content-Verarbeitung - Single Entry Point für alle AI-Actions. + + Alle AI-Actions (ai.process, ai.generateDocument, etc.) routen hier durch. + Sie unterscheiden sich nur in Parametern, nicht in Logik. Args: prompt: The main prompt for the AI call - contentParts: Optional list of already-extracted content parts (preferred) options: AI call configuration options (REQUIRED - operationType must be set) + contentParts: Optional list of already-extracted content parts (preferred) + documentList: Optional DocumentReferenceList (wird zu ChatDocuments konvertiert) + documentIntents: Optional list of DocumentIntent objects (wird erstellt wenn nicht vorhanden) outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents parentOperationId: Optional parent operation ID for hierarchical logging @@ -996,14 +2144,11 @@ If no trackable items can be identified, return: {{"kpis": []}} """ await self.ensureAiObjectsInitialized() - # Create separate operationId for detailed progress tracking + # Erstelle Operation-ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - # parentOperationId is already the operationId of the parent - - # Start progress tracking with parent reference + # Starte Progress-Tracking mit Parent-Referenz self.services.chat.progressLogStart( aiOperationId, "AI content processing", @@ -1013,376 +2158,141 @@ If no trackable items can be identified, return: {{"kpis": []}} ) try: - # Default outputFormat to "txt" if not specified (unified path - all formats handled the same way) + # Initialisiere Defaults if not outputFormat: outputFormat = "txt" - # Extraction is now separate - contentParts must be extracted before calling - # Require operationType to be set before calling opType = getattr(options, "operationType", None) if not opType: - # outputFormat is always set now (defaults to "txt"), so default to DATA_GENERATE options.operationType = OperationTypeEnum.DATA_GENERATE opType = OperationTypeEnum.DATA_GENERATE - # Handle IMAGE_GENERATE operations + # Route zu Operation-spezifischen Handlern if opType == OperationTypeEnum.IMAGE_GENERATE: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for image generation") - - request = AiCallRequest( - prompt=prompt, - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - # Build document data for image - imageDoc = DocumentData( - documentName="generated_image.png", - documentData=response.content, - mimeType="image/png" - ) - - metadata = AiResponseMetadata( - title=title or "Generated Image", - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - "ai.generate.image" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Image generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata, - documents=[imageDoc] - ) - else: - errorMsg = f"No image data returned: {response.content}" - logger.error(f"Error in AI image generation: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleImageGeneration(prompt, options, title, aiOperationId) - # Handle WEB_SEARCH and WEB_CRAWL operations if opType == OperationTypeEnum.WEB_SEARCH or opType == OperationTypeEnum.WEB_CRAWL: - self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") - - request = AiCallRequest( - prompt=prompt, # Raw JSON prompt - connector will parse it - context="", - options=options - ) - - response = await self.callAi(request) - - if response.content: - metadata = AiResponseMetadata( - operationType=opType.value - ) - - self.services.chat.storeWorkflowStat( - self.services.workflow, - response, - f"ai.{opType.name.lower()}" - ) - - self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=response.content, - metadata=metadata - ) - else: - errorMsg = f"No content returned from {opType.name}: {response.content}" - logger.error(f"Error in {opType.name}: {errorMsg}") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(errorMsg) + return await self._handleWebOperation(prompt, options, opType, aiOperationId) - # Handle document generation (outputFormat always set, defaults to "txt") - # Unified path: all formats (txt, docx, xlsx, pdf, etc.) handled the same way - # outputFormat is always set now (defaults to "txt" if not specified) - - # CRITICAL: For document generation with JSON templates, NEVER compress the prompt + # Dokument-Generierungs-Pfad options.compressPrompt = False options.compressContext = False - # Process contentParts for generation prompt (if provided) - # Use generic callWithContentParts() which handles all content types (images, text, etc.) - # This automatically processes images with vision models and merges all results - if contentParts: - # Filter out binary/other parts that shouldn't be processed - processableParts = [] - skippedParts = [] - for p in contentParts: - if p.typeGroup in ["image", "text", "table", "structure"] or (p.mimeType and (p.mimeType.startswith("image/") or p.mimeType.startswith("text/"))): - processableParts.append(p) - else: - skippedParts.append(p) - - if skippedParts: - logger.debug(f"Skipping {len(skippedParts)} binary/other parts from document generation") - - if processableParts: - # Count images for progress update - imageCount = len([p for p in processableParts if p.typeGroup == "image" or (p.mimeType and p.mimeType.startswith("image/"))]) - if imageCount > 0: - self.services.chat.progressLogUpdate(aiOperationId, 0.25, f"Extracting data from {imageCount} images using vision models") - - # Build proper extraction prompt using buildExtractionPrompt - # This creates a focused extraction prompt, not the user's generation prompt - from modules.services.serviceExtraction.subPromptBuilderExtraction import buildExtractionPrompt - - # Determine renderer for format-specific guidelines - renderer = None - if outputFormat: - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - renderer = generationService.getRendererForFormat(outputFormat) - except Exception as e: - logger.debug(f"Could not get renderer for format {outputFormat}: {e}") - - extractionPrompt = await buildExtractionPrompt( - outputFormat=outputFormat or "txt", - userPrompt=prompt, # User's prompt as context for what to extract - title=title or "Document", - aiService=self if hasattr(self, 'aiObjects') and self.aiObjects else None, - services=self.services, - renderer=renderer - ) - - logger.info(f"Processing {len(processableParts)} content parts ({imageCount} images) with extraction prompt") - - # Use DATA_EXTRACT operation type for extraction - extractionOptions = AiCallOptions( - operationType=OperationTypeEnum.DATA_EXTRACT, # Use DATA_EXTRACT for extraction - compressPrompt=options.compressPrompt, - compressContext=options.compressContext - ) - - extractionRequest = AiCallRequest( - prompt=extractionPrompt, # Use proper extraction prompt, not user's generation prompt - context="", - options=extractionOptions, - contentParts=processableParts - ) - - # Write debug file for extraction prompt (all parts) - self.services.utils.writeDebugFile(extractionPrompt, "content_extraction_prompt") - - # Call generic content parts processor - handles images, text, chunking, merging - extractionResponse = await self.callAi(extractionRequest) - - # Write debug file for extraction response - if extractionResponse.content: - self.services.utils.writeDebugFile(extractionResponse.content, "content_extraction_response") - else: - self.services.utils.writeDebugFile(f"Error: No content returned (errorCount={extractionResponse.errorCount})", "content_extraction_response") - logger.warning(f"Content extraction returned no content (errorCount={extractionResponse.errorCount})") - - # Use extracted content directly for generation prompt - if extractionResponse.errorCount == 0 and extractionResponse.content: - # The extracted content is already merged and ready to use - content_for_generation = extractionResponse.content - logger.info(f"Successfully extracted content from {len(processableParts)} parts ({len(extractionResponse.content)} chars) for document generation") - else: - # Extraction failed - use placeholders - logger.warning(f"Content extraction failed, using placeholders") - placeholderParts = [] - for p in processableParts: - placeholderParts.append(f"[{p.typeGroup}: {p.label} - Extraction failed]") - content_for_generation = "\n\n".join(placeholderParts) if placeholderParts else None - else: - content_for_generation = None - logger.debug("No processable parts found in contentParts") - else: - content_for_generation = None + # Schritt 5A: Kläre Dokument-Intents + documents = [] + if documentList: + documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) - # Detect if this is a section generation prompt (not full document generation) - # Section prompts contain "SECTION TO GENERATE" marker - isSectionGeneration = "SECTION TO GENERATE" in prompt or "CRITICAL: Return ONLY a JSON object with an \"elements\" array" in prompt - - if isSectionGeneration: - # For section generation, use the prompt directly without wrapping - # Section prompts are already complete and should not be wrapped in document generation template - logger.debug("Detected section generation prompt - skipping document generation wrapper") - generation_prompt = prompt - - # Call AI directly without looping (sections are simple, single-call) - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for section generation") - request = AiCallRequest( - prompt=generation_prompt, - context="", - options=options - ) - response = await self.callAi(request) - generated_json = response.content if response and response.content else "" - - # For section generation, return the raw JSON content directly - # No rendering needed - sections are just JSON elements - self.services.chat.progressLogUpdate(aiOperationId, 0.9, "Section content generated") - self.services.chat.progressLogFinish(aiOperationId, True) - - metadata = AiResponseMetadata( - title=title or "Section Content", - operationType=opType.value if opType else None - ) - - return AiResponse( - content=generated_json, - metadata=metadata, - documents=[] - ) - else: - # Full document generation - use the wrapper - self.services.chat.progressLogUpdate(aiOperationId, 0.3, "Building generation prompt") - from modules.services.serviceGeneration.subPromptBuilderGeneration import buildGenerationPrompt - - generation_prompt = await buildGenerationPrompt( - outputFormat, prompt, title, content_for_generation, None, self.services - ) - - promptArgs = { - "outputFormat": outputFormat, - "userPrompt": prompt, - "title": title, - "extracted_content": content_for_generation, - "services": self.services - } - - self.services.chat.progressLogUpdate(aiOperationId, 0.4, "Calling AI for content generation") - # Extract user prompt from promptArgs for task completion analysis - userPrompt = None - if promptArgs: - userPrompt = promptArgs.get("userPrompt") or promptArgs.get("user_prompt") - - # Track generation progress - the looping function will update with byte progress - generated_json = await self._callAiWithLooping( - generation_prompt, - options, - "document_generation", - buildGenerationPrompt, - promptArgs, - aiOperationId, - userPrompt=userPrompt + if not documentIntents and documents: + documentIntents = await self._clarifyDocumentIntents( + documents, + prompt, + {"outputFormat": outputFormat}, + aiOperationId ) - # Calculate final size for completion message - finalSize = len(generated_json.encode('utf-8')) if generated_json else 0 - if finalSize < 1024: - finalSizeDisplay = f"{finalSize}B" - elif finalSize < 1024 * 1024: - finalSizeDisplay = f"{finalSize / 1024:.1f}kB" - else: - finalSizeDisplay = f"{finalSize / (1024 * 1024):.1f}MB" + # Schritt 5B: Extrahiere und bereite Content vor + if documents: + preparedContentParts = await self._extractAndPrepareContent( + documents, + documentIntents or [], + aiOperationId + ) + + # Merge mit bereitgestellten contentParts (falls vorhanden) + if contentParts: + # Prüfe auf pre-extracted Content + for part in contentParts: + if part.metadata.get("skipExtraction", False): + # Bereits extrahiert - verwende as-is, stelle sicher dass Metadaten vollständig + part.metadata.setdefault("contentFormat", "extracted") + part.metadata.setdefault("isPreExtracted", True) + preparedContentParts.extend(contentParts) + + contentParts = preparedContentParts - self.services.chat.progressLogUpdate(aiOperationId, 0.7, f"Parsing generated JSON ({finalSizeDisplay})") - try: - extracted_json = self.services.utils.jsonExtractString(generated_json) - generated_data = json.loads(extracted_json) - except json.JSONDecodeError as e: - logger.error(f"Failed to parse generated JSON: {str(e)}") - self.services.utils.writeDebugFile(generated_json, "failed_json_parsing") - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Generated content is not valid JSON: {str(e)}") - - # Extract title and filename from generated document structure - extractedTitle = title - extractedFilename = None - if isinstance(generated_data, dict) and "documents" in generated_data: - docs = generated_data["documents"] - if isinstance(docs, list) and len(docs) > 0: - firstDoc = docs[0] - if isinstance(firstDoc, dict): - if firstDoc.get("title"): - extractedTitle = firstDoc["title"] - if firstDoc.get("filename"): - extractedFilename = firstDoc["filename"] - - # Ensure metadata contains the extracted title - if "metadata" not in generated_data: - generated_data["metadata"] = {} - if extractedTitle: - generated_data["metadata"]["title"] = extractedTitle - - # Create separate operation for content rendering - renderOperationId = f"{aiOperationId}_render" - # Use aiOperationId directly as parentOperationId (operationId, not log entry ID) - self.services.chat.progressLogStart( - renderOperationId, - "Content Rendering", - "Rendering", - f"Format: {outputFormat}", - parentOperationId=aiOperationId + # Schritt 5C: Generiere Struktur + structure = await self._generateStructure( + prompt, + contentParts or [], + outputFormat, + aiOperationId ) - try: - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - self.services.chat.progressLogUpdate(renderOperationId, 0.5, f"Rendering to {outputFormat} format") - rendered_content, mime_type, _images = await generationService.renderReport( - generated_data, outputFormat, extractedTitle or "Generated Document", prompt, self - ) - self.services.chat.progressLogFinish(renderOperationId, True) - - # Determine document name - if extractedFilename: - documentName = extractedFilename - elif extractedTitle and extractedTitle != "Generated Document": - sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", extractedTitle) - sanitized = re.sub(r"_+", "_", sanitized).strip("_") - if sanitized: - if not sanitized.lower().endswith(f".{outputFormat}"): - documentName = f"{sanitized}.{outputFormat}" - else: - documentName = sanitized - else: - documentName = f"generated.{outputFormat}" - else: - documentName = f"generated.{outputFormat}" - - # Build document data - docData = DocumentData( - documentName=documentName, - documentData=rendered_content, - mimeType=mime_type, - sourceJson=generated_data # Preserve source JSON for structure validation - ) - - metadata = AiResponseMetadata( - title=extractedTitle or title or "Generated Document", - filename=extractedFilename, - operationType=opType.value if opType else None - ) - - # Write JSON with proper formatting (not str() which can truncate) - jsonStr = json.dumps(generated_data, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile(jsonStr, "document_generation_response") - self.services.chat.progressLogFinish(aiOperationId, True) - - return AiResponse( - content=json.dumps(generated_data), - metadata=metadata, - documents=[docData] - ) - - except Exception as e: - logger.error(f"Error rendering document: {str(e)}") - if renderOperationId: - self.services.chat.progressLogFinish(renderOperationId, False) - self.services.chat.progressLogFinish(aiOperationId, False) - raise ValueError(f"Rendering failed: {str(e)}") + # Schritt 5D: Fülle Struktur + filledStructure = await self._fillStructure( + structure, + contentParts or [], + prompt, + aiOperationId + ) + + # Schritt 5E: Rendere Resultat + renderedContent, mimeType = await self._renderResult( + filledStructure, + outputFormat, + title or "Generated Document", + prompt, + aiOperationId + ) + + # Baue Response + documentName = self._determineDocumentName(filledStructure, outputFormat, title) + + docData = DocumentData( + documentName=documentName, + documentData=renderedContent, + mimeType=mimeType, + sourceJson=filledStructure + ) + + metadata = AiResponseMetadata( + title=title or filledStructure.get("metadata", {}).get("title", "Generated Document"), + operationType=opType.value + ) + + # Debug-Log (harmonisiert) + self.services.utils.writeDebugFile( + json.dumps(filledStructure, indent=2, ensure_ascii=False, default=str), + "document_generation_response" + ) + + self.services.chat.progressLogFinish(aiOperationId, True) + + return AiResponse( + content=json.dumps(filledStructure), + metadata=metadata, + documents=[docData] + ) except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") self.services.chat.progressLogFinish(aiOperationId, False) raise + + def _determineDocumentName( + self, + filledStructure: Dict[str, Any], + outputFormat: str, + title: Optional[str] + ) -> str: + """Bestimme Dokument-Namen aus Struktur oder Titel.""" + # Versuche aus Struktur zu extrahieren + if isinstance(filledStructure, dict) and "documents" in filledStructure: + docs = filledStructure["documents"] + if isinstance(docs, list) and len(docs) > 0: + firstDoc = docs[0] + if isinstance(firstDoc, dict) and firstDoc.get("filename"): + return firstDoc["filename"] + + # Fallback zu Titel + if title: + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{outputFormat}"): + return f"{sanitized}.{outputFormat}" + return sanitized + + return f"generated.{outputFormat}" diff --git a/modules/services/serviceExtraction/chunking/chunkerStructure.py b/modules/services/serviceExtraction/chunking/chunkerStructure.py index bdf1bcdb..f4d23a72 100644 --- a/modules/services/serviceExtraction/chunking/chunkerStructure.py +++ b/modules/services/serviceExtraction/chunking/chunkerStructure.py @@ -34,12 +34,42 @@ class StructureChunker(Chunker): if bucket: emit(bucket) else: + # JSON object (dict) - check if it fits text = json.dumps(obj, ensure_ascii=False) - if len(text.encode('utf-8')) <= maxBytes: + textSize = len(text.encode('utf-8')) + if textSize <= maxBytes: emit(obj) else: - # fallback to line chunking - raise ValueError("too large") + # Object too large - try to split by keys if possible + # For large objects, we need to chunk by character boundaries + # since we can't split JSON objects arbitrarily + if isinstance(obj, dict) and len(obj) > 1: + # Try to split object into multiple chunks by keys + # This preserves JSON structure better than line-based chunking + currentChunk: Dict[str, Any] = {} + currentSize = 2 # Start with "{}" overhead + for key, value in obj.items(): + itemText = json.dumps({key: value}, ensure_ascii=False) + itemSize = len(itemText.encode('utf-8')) + # Account for comma and spacing between items + if currentChunk: + itemSize += 2 # ", " separator + + if currentSize + itemSize > maxBytes and currentChunk: + # Current chunk is full, emit it + emit(currentChunk) + currentChunk = {key: value} + currentSize = len(itemText.encode('utf-8')) + else: + currentChunk[key] = value + currentSize += itemSize + + # Emit remaining chunk + if currentChunk: + emit(currentChunk) + else: + # Single large value or can't split - fallback to line chunking + raise ValueError("too large") except Exception: current: List[str] = [] size = 0 diff --git a/modules/services/serviceExtraction/mainServiceExtraction.py b/modules/services/serviceExtraction/mainServiceExtraction.py index 663753cd..a2972453 100644 --- a/modules/services/serviceExtraction/mainServiceExtraction.py +++ b/modules/services/serviceExtraction/mainServiceExtraction.py @@ -6,10 +6,11 @@ import logging import time import asyncio import base64 +import json from .subRegistry import ExtractorRegistry, ChunkerRegistry from .subPipeline import runExtraction -from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult +from modules.datamodels.datamodelExtraction import ContentExtracted, ContentPart, MergeStrategy, ExtractionOptions, PartResult, DocumentIntent from modules.datamodels.datamodelChat import ChatDocument from modules.datamodels.datamodelAi import AiCallResponse, AiCallRequest, AiCallOptions, OperationTypeEnum, AiModelCall from modules.aicore.aicoreModelRegistry import modelRegistry @@ -73,12 +74,14 @@ class ExtractionService: if operationId: workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" docOperationId = f"{operationId}_doc_{i}" + # Use parentOperationId if provided, otherwise use operationId as parent + parentId = parentOperationId if parentOperationId else operationId self.services.chat.progressLogStart( docOperationId, "Extracting Document", f"Document {i + 1}/{totalDocs}", doc.fileName[:50] + "..." if len(doc.fileName) > 50 else doc.fileName, - parentOperationId=operationId # Use operationId as parent (not parentOperationId) + parentOperationId=parentId # Correct parent reference for ChatLog hierarchy ) # Start timing for this document @@ -125,12 +128,41 @@ class ExtractionService: if part.metadata: logger.debug(f" Metadata: {part.metadata}") - # Attach document id and MIME type to parts if missing + # Attach complete metadata to parts according to ContentPart Metadaten-Schema for p in ec.parts: + # Ensure metadata dict exists + if not p.metadata: + p.metadata = {} + + # Required metadata fields (from concept) if "documentId" not in p.metadata: p.metadata["documentId"] = documentData["id"] or str(uuid.uuid4()) if "documentMimeType" not in p.metadata: p.metadata["documentMimeType"] = documentData["mimeType"] + if "originalFileName" not in p.metadata: + p.metadata["originalFileName"] = documentData["fileName"] + + # ContentFormat: Set based on typeGroup and mimeType + # Default to "extracted" for text content, but can be overridden by caller + if "contentFormat" not in p.metadata: + # Default: extracted text content + p.metadata["contentFormat"] = "extracted" + + # Intent: Default to "extract" for extracted content + if "intent" not in p.metadata: + p.metadata["intent"] = "extract" + + # ExtractionPrompt: Use from options if available + if "extractionPrompt" not in p.metadata and options and options.prompt: + p.metadata["extractionPrompt"] = options.prompt + + # UsageHint: Provide default hint + if "usageHint" not in p.metadata: + p.metadata["usageHint"] = f"Use extracted content from {documentData['fileName']}" + + # SourceAction: Mark as from extraction service + if "sourceAction" not in p.metadata: + p.metadata["sourceAction"] = "extraction.extractContent" # Log chunking information chunkedParts = [p for p in ec.parts if p.metadata.get("chunk", False)] @@ -185,7 +217,7 @@ class ExtractionService: # Write extraction results to debug file try: from modules.shared.debugLogger import writeDebugFile - import json + # json is already imported at module level # Create summary of extraction results for debug extractionSummary = { "documentName": doc.fileName, @@ -487,7 +519,8 @@ class ExtractionService: prompt: str, aiObjects: Any, options: Optional[AiCallOptions] = None, - operationId: Optional[str] = None + operationId: Optional[str] = None, + parentOperationId: Optional[str] = None ) -> str: """ Process documents with model-aware chunking and merge results. @@ -499,6 +532,7 @@ class ExtractionService: aiObjects: AiObjects instance for making AI calls options: AI call options operationId: Optional operation ID for progress tracking + parentOperationId: Optional parent operation ID for hierarchical logging Returns: Merged AI results as string with preserved document structure @@ -514,7 +548,8 @@ class ExtractionService: operationId, "AI Text Extract", "Document Processing", - f"Processing {len(documents)} documents" + f"Processing {len(documents)} documents", + parentOperationId=parentOperationId # Use parentOperationId if provided ) try: @@ -539,7 +574,8 @@ class ExtractionService: if operationId: self.services.chat.progressLogUpdate(operationId, 0.1, f"Extracting content from {len(documents)} documents") # Pass operationId as parentOperationId for hierarchical logging - extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=parentOperationId) + # Correct hierarchy: parentOperationId -> operationId -> docOperationId + extractionResult = self.extractContent(documents, extractionOptions, operationId=operationId, parentOperationId=operationId) if not isinstance(extractionResult, list): if operationId: @@ -549,9 +585,10 @@ class ExtractionService: # Process parts (not chunks) with model-aware AI calls if operationId: self.services.chat.progressLogUpdate(operationId, 0.3, f"Processing {len(extractionResult)} extracted content parts") - # Use parent operation ID directly (parentId should be operationId, not log entry ID) - parentOperationId = operationId # Use the parent's operationId directly - partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, parentOperationId) + # Use operationId as parentOperationId for child operations + # Correct hierarchy: parentOperationId -> operationId -> partOperationId + processParentOperationId = operationId + partResults = await self._processPartsWithMapping(extractionResult, prompt, aiObjects, options, operationId, processParentOperationId) # Merge results using existing merging system if operationId: @@ -733,7 +770,8 @@ class ExtractionService: # Detect input type and convert accordingly if isinstance(partResults[0], PartResult): # Existing logic for PartResult (from processDocumentsPerChunk) - for part_result in partResults: + # Phase 7: Add originalIndex for explicit ordering + for i, part_result in enumerate(partResults): content_part = ContentPart( id=part_result.originalPart.id, parentId=part_result.originalPart.parentId, @@ -744,7 +782,9 @@ class ExtractionService: metadata={ **part_result.originalPart.metadata, "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index "partIndex": part_result.partIndex, + "processingOrder": i, # Phase 7: Processing order "documentId": part_result.documentId, "processingTime": part_result.processingTime, "success": part_result.metadata.get("success", False) @@ -753,6 +793,7 @@ class ExtractionService: content_parts.append(content_part) elif isinstance(partResults[0], AiCallResponse): # Logic from interfaceAiObjects (from content parts processing) + # Phase 7: Add originalIndex for explicit ordering for i, result in enumerate(partResults): if result.content: content_part = ContentPart( @@ -764,6 +805,8 @@ class ExtractionService: data=result.content, metadata={ "aiResult": True, + "originalIndex": i, # Phase 7: Explicit order index + "processingOrder": i, # Phase 7: Processing order "modelName": result.modelName, "priceUsd": result.priceUsd, "processingTime": result.processingTime, @@ -792,11 +835,12 @@ class ExtractionService: # Determine merge strategy based on input type if isinstance(partResults[0], PartResult): - # Use strategy for extraction workflow (group by document, order by part index) + # Phase 7: Use originalIndex for explicit ordering + # Use strategy for extraction workflow (group by document, order by originalIndex) merge_strategy = MergeStrategy( useIntelligentMerging=True, groupBy="documentId", # Group by document - orderBy="partIndex", # Order by part index + orderBy="originalIndex", # Phase 7: Order by originalIndex instead of partIndex mergeType="concatenate" ) else: @@ -811,10 +855,52 @@ class ExtractionService: # Apply merging merged_parts = applyMerging(content_parts, merge_strategy) - # Convert back to string - final_content = "\n\n".join([part.data for part in merged_parts]) + # Phase 6: Enhanced format with metadata preservation + # CRITICAL: For generation responses (JSON), don't add SOURCE markers - they interfere with JSON parsing + # Check if this is a generation response by looking at operationType or content structure + isGenerationResponse = False + if options and hasattr(options, 'operationType'): + # Generation responses use DATA_GENERATE operation type + from modules.datamodels.datamodelAi import OperationTypeEnum + isGenerationResponse = options.operationType == OperationTypeEnum.DATA_GENERATE - logger.info(f"Merged {len(partResults)} parts using unified merging system") + # Also check if content looks like JSON (starts with { or [) + if not isGenerationResponse and merged_parts: + firstPartData = merged_parts[0].data if merged_parts[0].data else "" + if isinstance(firstPartData, str) and firstPartData.strip().startswith(('{', '[')): + # Check if it's a complete JSON structure (not extracted content) + # Generation responses are complete JSON, extraction responses are text content + try: + # json is already imported at module level + json.loads(firstPartData.strip()) + # If it parses as JSON and has "documents" key, it's likely a generation response + parsed = json.loads(firstPartData.strip()) + if isinstance(parsed, dict) and "documents" in parsed: + isGenerationResponse = True + except: + pass + + content_sections = [] + for part in merged_parts: + if isGenerationResponse: + # For generation responses, return JSON directly without SOURCE markers + content_sections.append(part.data) + else: + # For extraction responses, include metadata in section header for traceability + doc_id = part.metadata.get("documentId", "unknown") + doc_mime = part.metadata.get("documentMimeType", "unknown") + label = part.label or "content" + + section = f""" +[SOURCE: documentId={doc_id}, mimeType={doc_mime}, label={label}] +{part.data} +[END SOURCE] +""" + content_sections.append(section) + + final_content = "\n\n".join(content_sections) + + logger.info(f"Merged {len(partResults)} parts using unified merging system with metadata preservation (generationResponse={isGenerationResponse})") return final_content.strip() async def chunkContentPartForAi(self, contentPart, model, options, prompt: str = "") -> List[Dict[str, Any]]: @@ -827,9 +913,14 @@ class ExtractionService: modelContextTokens = model.contextLength # Total context in tokens modelMaxOutputTokens = model.maxTokens # Maximum output tokens + # CRITICAL: Use same conservative token factor as in processContentPartWithFallback + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + # Reserve tokens for: - # 1. Prompt (user message) - promptTokens = len(prompt.encode('utf-8')) / 4 if prompt else 0 + # 1. Prompt (user message) - use conservative factor + promptSize = len(prompt.encode('utf-8')) if prompt else 0 + promptTokens = promptSize / TOKEN_SAFETY_FACTOR # 2. System message wrapper ("Context from documents:\n") systemMessageTokens = 10 # ~40 bytes = 10 tokens @@ -844,31 +935,38 @@ class ExtractionService: totalReservedTokens = promptTokens + systemMessageTokens + messageOverheadTokens + outputTokens # Available tokens for content = context length - reserved tokens - # Use 80% of available for safety margin - availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.8) + # Use 60% of available (same conservative margin as in processContentPartWithFallback) + availableContentTokens = int((modelContextTokens - totalReservedTokens) * 0.60) # Ensure we have at least some space if availableContentTokens < 100: logger.warning(f"Very limited space for content: {availableContentTokens} tokens available. Model: {model.name}, contextLength: {modelContextTokens}, maxTokens: {modelMaxOutputTokens}, prompt: {promptTokens:.0f} tokens") availableContentTokens = max(100, int(modelContextTokens * 0.1)) # Fallback to 10% of context - # Convert tokens to bytes (1 token ≈ 4 bytes) - availableContentBytes = availableContentTokens * 4 + # Convert tokens to bytes using conservative factor (reverse: bytes = tokens * factor) + availableContentBytes = int(availableContentTokens * TOKEN_SAFETY_FACTOR) - logger.debug(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens, reserved={totalReservedTokens:.0f} tokens, available={availableContentTokens} tokens ({availableContentBytes} bytes)") + logger.info(f"Chunking calculation for {model.name}: contextLength={modelContextTokens} tokens, maxTokens={modelMaxOutputTokens} tokens, prompt={promptTokens:.0f} tokens est., reserved={totalReservedTokens:.0f} tokens est., available={availableContentTokens} tokens est. ({availableContentBytes} bytes), factor={TOKEN_SAFETY_FACTOR}") - # Use 70% of available content bytes for text chunks (conservative) - textChunkSize = int(availableContentBytes * 0.7) - imageChunkSize = int(availableContentBytes * 0.8) # 80% for image chunks + # Use 50% of available content bytes for text chunks (very conservative to ensure chunks fit) + # This ensures that even with token counting inaccuracies, chunks will fit + textChunkSize = int(availableContentBytes * 0.5) + structureChunkSize = int(availableContentBytes * 0.5) # CRITICAL: Also set for StructureChunker (JSON content) + tableChunkSize = int(availableContentBytes * 0.5) # Also set for TableChunker + imageChunkSize = int(availableContentBytes * 0.6) # 60% for image chunks - # Build chunking options + # Build chunking options - include ALL chunk size options for different chunkers chunkingOptions = { "textChunkSize": textChunkSize, + "structureChunkSize": structureChunkSize, # CRITICAL: Required for StructureChunker (JSON) + "tableChunkSize": tableChunkSize, # Required for TableChunker "imageChunkSize": imageChunkSize, "maxSize": availableContentBytes, "chunkAllowed": True } + logger.info(f"Chunking options: textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes, tableChunkSize={tableChunkSize} bytes, imageChunkSize={imageChunkSize} bytes, contentPartSize={len(contentPart.data.encode('utf-8')) if contentPart.data else 0} bytes") + # Get appropriate chunker (uses existing ChunkerRegistry ✅) chunker = self._chunkerRegistry.resolve(contentPart.typeGroup) @@ -878,8 +976,14 @@ class ExtractionService: # Chunk the content part try: + contentSize = len(contentPart.data.encode('utf-8')) if contentPart.data else 0 + logger.info(f"Chunking {contentPart.typeGroup} part: contentSize={contentSize} bytes, textChunkSize={textChunkSize} bytes, structureChunkSize={structureChunkSize} bytes") chunks = chunker.chunk(contentPart, chunkingOptions) - logger.debug(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part") + logger.info(f"Created {len(chunks)} chunks for {contentPart.typeGroup} part (contentSize={contentSize} bytes)") + if chunks: + for i, chunk in enumerate(chunks): + chunkSize = len(chunk.get('data', '').encode('utf-8')) if chunk.get('data') else 0 + logger.info(f" Chunk {i+1}/{len(chunks)}: {chunkSize} bytes") return chunks except Exception as e: logger.error(f"Chunking failed for {contentPart.typeGroup}: {str(e)}") @@ -999,15 +1103,86 @@ class ExtractionService: availableContentBytes = availableContentTokens * 4 - logger.debug(f"Size check for {model.name}: partSize={partSize} bytes, availableContentBytes={availableContentBytes} bytes") + # Also check prompt size - prompt + content together must fit + promptSize = len(prompt.encode('utf-8')) if prompt else 0 - if partSize <= availableContentBytes: + # CRITICAL: Token counting approximation is VERY inaccurate for JSON/content + # Real-world observation: Our calculation says 94k tokens, but API says 217k tokens (2.3x difference!) + # This happens because: + # 1. JSON/structured content tokenizes differently (more tokens per byte) + # 2. API has message structure overhead (system prompts, message wrappers) + # 3. Tokenizer differences between our approximation and actual API tokenizer + # Use conservative factor: 1 token ≈ 2.2 bytes (instead of 4) to account for these differences + TOKEN_SAFETY_FACTOR = 2.2 # Conservative: accounts for JSON tokenization and API overhead + promptTokens = promptSize / TOKEN_SAFETY_FACTOR + contentTokens = partSize / TOKEN_SAFETY_FACTOR + totalTokens = promptTokens + contentTokens + + # CRITICAL: Use very conservative margin (60%) because: + # 1. Token counting approximation is inaccurate - real tokens can be 2-3x more + # 2. API has additional overhead (message structure, system prompts, etc.) + # 3. Anthropic API is strict about the 200k limit + # 4. We've seen cases where our calculation says "fits" but API says "too long" + maxTotalTokens = int(modelContextTokens * 0.60) + + logger.info(f"Size check for {model.name}: partSize={partSize} bytes ({contentTokens:.0f} tokens est.), promptSize={promptSize} bytes ({promptTokens:.0f} tokens est.), total={totalTokens:.0f} tokens est., modelContext={modelContextTokens} tokens, maxTotal={maxTotalTokens} tokens (60% margin, conservative factor={TOKEN_SAFETY_FACTOR})") + + # CRITICAL: Always check totalTokens first - if prompt + content exceeds limit, MUST chunk + # Token counting approximation may differ significantly from API, so use very conservative margin + if totalTokens > maxTotalTokens: + logger.warning(f"⚠️ Total tokens ({totalTokens:.0f} est.) exceed model limit ({maxTotalTokens}), chunking required. Prompt: {promptTokens:.0f} tokens est., Content: {contentTokens:.0f} tokens est.") + elif partSize > availableContentBytes: + logger.warning(f"⚠️ Content part ({contentTokens:.0f} tokens est.) exceeds available space ({availableContentBytes/TOKEN_SAFETY_FACTOR:.0f} tokens est.), chunking required") + + # If either condition fails, chunk the content + if totalTokens > maxTotalTokens or partSize > availableContentBytes: + # Part too large or total exceeds limit - chunk it + chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) + if not chunks: + raise ValueError(f"Failed to chunk content part for model {model.name}") + + logger.info(f"Starting to process {len(chunks)} chunks with model {model.name}") + + if progressCallback: + progressCallback(0.0, f"Starting to process {len(chunks)} chunks") + + chunkResults = [] + for idx, chunk in enumerate(chunks): + chunkNum = idx + 1 + chunkData = chunk.get('data', '') + logger.info(f"Processing chunk {chunkNum}/{len(chunks)} with model {model.name}") + + if progressCallback: + progressCallback(chunkNum / len(chunks), f"Processing chunk {chunkNum}/{len(chunks)}") + + try: + chunkResponse = await aiObjects._callWithModel(model, prompt, chunkData, options) + chunkResults.append(chunkResponse) + except Exception as chunkError: + logger.error(f"Error processing chunk {chunkNum}/{len(chunks)}: {str(chunkError)}") + # Continue with other chunks even if one fails + continue + + # Merge chunk results + if not chunkResults: + raise ValueError(f"All chunks failed for content part") + + mergedContent = self.mergePartResults(chunkResults, options) + return AiCallResponse( + content=mergedContent, + modelName=model.name, + priceUsd=sum(r.priceUsd for r in chunkResults), + processingTime=sum(r.processingTime for r in chunkResults), + bytesSent=sum(r.bytesSent for r in chunkResults), + bytesReceived=sum(r.bytesReceived for r in chunkResults), + errorCount=sum(r.errorCount for r in chunkResults) + ) + else: # Part fits - call AI directly via aiObjects interface + logger.info(f"✅ Content part fits within model limits, processing directly") response = await aiObjects._callWithModel(model, prompt, contentPart.data, options) logger.info(f"✅ Content part processed successfully with model: {model.name}") return response - else: - # Part too large - chunk it chunks = await self.chunkContentPartForAi(contentPart, model, options, prompt) if not chunks: raise ValueError(f"Failed to chunk content part for model {model.name}") @@ -1037,8 +1212,8 @@ class ExtractionService: logger.error(f"❌ Error processing chunk {chunkNum}/{len(chunks)}: {str(e)}") raise - # Merge chunk results - mergedContent = self.mergeChunkResults(chunkResults) + # Merge chunk results using unified mergePartResults + mergedContent = self.mergePartResults(chunkResults, options) logger.info(f"✅ Content part chunked and processed with model: {model.name} ({len(chunks)} chunks)") return AiCallResponse( diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 5b518afa..cababbeb 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -2,7 +2,9 @@ # All rights reserved. import logging import uuid -from typing import Any, Dict, List, Optional +import base64 +import traceback +from typing import Any, Dict, List, Optional, Callable from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceGeneration.subDocumentUtility import ( getFileExtension, @@ -100,12 +102,12 @@ class GenerationService: # For binary data, handle bytes vs base64 string vs regular string if isinstance(documentData, bytes): # Already bytes - encode to base64 string for storage - import base64 + # base64 is already imported at module level content = base64.b64encode(documentData).decode('utf-8') base64encoded = True elif isinstance(documentData, str): # Check if it's already valid base64 - import base64 + # base64 is already imported at module level try: # Try to decode to verify it's base64 base64.b64decode(documentData, validate=True) @@ -122,7 +124,7 @@ class GenerationService: continue else: # Other types - convert to string then base64 - import base64 + # base64 is already imported at module level try: content = base64.b64encode(str(documentData).encode('utf-8')).decode('utf-8') base64encoded = True @@ -231,7 +233,7 @@ class GenerationService: return None # Convert content to bytes if base64encoded: - import base64 + # base64 is already imported at module level content_bytes = base64.b64decode(content) else: content_bytes = content.encode('utf-8') @@ -319,10 +321,12 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str, List[Dict[str, Any]]]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> tuple[str, str, List[Dict[str, Any]]]: """ Render extracted JSON content to the specified output format. + Supports multiple documents in documents array (Phase 5: Multi-Dokument-Rendering). Always uses unified "documents" array format. + Supports three content formats: reference, object (base64), extracted_text. Args: extractedContent: Structured JSON document from AI extraction @@ -330,6 +334,7 @@ class GenerationService: title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation + parentOperationId: Optional parent operation ID for hierarchical logging Returns: tuple: (rendered_content, mime_type, images_list) @@ -348,15 +353,40 @@ class GenerationService: if len(documents) == 0: raise ValueError("No documents found in 'documents' array") - # Use first document for rendering - single_doc = documents[0] - if "sections" not in single_doc: - raise ValueError("Document must contain 'sections' field") - - # Pass standardized schema to renderer (maintains architecture) - # Renderer should extract sections from documents array according to standardized schema - # Standardized schema: {metadata: {...}, documents: [{sections: [...]}]} - contentToRender = extractedContent # Pass full standardized schema + # Phase 5: Multi-Dokument-Rendering + if len(documents) == 1: + # Single document - use existing logic + single_doc = documents[0] + if "sections" not in single_doc: + raise ValueError("Document must contain 'sections' field") + + # Pass standardized schema to renderer (maintains architecture) + contentToRender = extractedContent # Pass full standardized schema + else: + # Multiple documents - merge all sections into one document for rendering + # Option: Merge all sections from all documents into a single document + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) + + if not all_sections: + raise ValueError("No sections found in any document") + + # Create merged document with all sections + merged_document = { + "metadata": extractedContent.get("metadata", {}), + "documents": [{ + "id": "merged", + "title": title, + "filename": f"{title}.{outputFormat}", + "sections": all_sections + }] + } + contentToRender = merged_document + logger.info(f"Rendering {len(documents)} documents with {len(all_sections)} total sections") # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) @@ -378,6 +408,92 @@ class GenerationService: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") raise + async def generateDocumentWithTwoPhases( + self, + userPrompt: str, + cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, + maxSectionLength: int = 500, + parallelGeneration: bool = True, + progressCallback: Optional[Callable] = None + ) -> Dict[str, Any]: + """ + Generate document using two-phase approach: + 1. Generate structure skeleton with empty sections + 2. Generate content for each section iteratively + + This is the core logic for document generation in AI calls. + + Args: + userPrompt: User's original prompt + cachedContent: Optional extracted content cache (from extraction phase) + contentParts: Optional list of ContentParts to use for structure generation + maxSectionLength: Maximum words for simple sections + parallelGeneration: Enable parallel section generation + progressCallback: Optional callback function(progress, total, message) for progress updates + + Returns: + Complete document structure with populated elements ready for rendering + """ + try: + from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator + from modules.services.serviceGeneration.subContentGenerator import ContentGenerator + + # Phase 1: Generate structure skeleton + if progressCallback: + progressCallback(0, 100, "Generating document structure...") + + structureGenerator = StructureGenerator(self.services) + + # Extract imageDocuments from cachedContent if available + existingImages = None + if cachedContent and cachedContent.get("imageDocuments"): + existingImages = cachedContent.get("imageDocuments") + + structure = await structureGenerator.generateStructure( + userPrompt=userPrompt, + documentList=None, # Not used in current implementation + cachedContent=cachedContent, + contentParts=contentParts, # Pass ContentParts for structure generation + maxSectionLength=maxSectionLength, + existingImages=existingImages + ) + + if progressCallback: + progressCallback(30, 100, "Structure generated, starting content generation...") + + # Phase 2: Generate content for each section + contentGenerator = ContentGenerator(self.services) + + # Create progress callback wrapper for content generation phase (30-90%) + def contentProgressCallback(sectionIndex: int, totalSections: int, message: str): + if progressCallback: + # Map section progress to overall progress (30% to 90%) + if totalSections > 0: + overallProgress = 30 + int(60 * (sectionIndex / totalSections)) + else: + overallProgress = 30 + progressCallback(overallProgress, 100, f"Section {sectionIndex}/{totalSections}: {message}") + + completeStructure = await contentGenerator.generateContent( + structure=structure, + cachedContent=cachedContent, + userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for content generation + progressCallback=contentProgressCallback, + parallelGeneration=parallelGeneration + ) + + if progressCallback: + progressCallback(100, 100, "Document generation complete") + + return completeStructure + + except Exception as e: + logger.error(f"Error in two-phase document generation: {str(e)}") + logger.debug(traceback.format_exc()) + raise + async def getAdaptiveExtractionPrompt( self, outputFormat: str, @@ -423,6 +539,6 @@ class GenerationService: except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") - import traceback + # traceback is already imported at module level logger.debug(traceback.format_exc()) return None \ No newline at end of file diff --git a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py index 491c1d06..e9693680 100644 --- a/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py +++ b/modules/services/serviceGeneration/renderers/rendererBaseTemplate.py @@ -68,6 +68,7 @@ class BaseRenderer(ABC): def _extractSections(self, reportData: Dict[str, Any]) -> List[Dict[str, Any]]: """ Extract sections from standardized schema: {metadata: {...}, documents: [{sections: [...]}]} + Phase 5: Supports multiple documents - extracts all sections from all documents. """ if "documents" not in reportData: raise ValueError("Report data must follow standardized schema with 'documents' array") @@ -76,11 +77,18 @@ class BaseRenderer(ABC): if not isinstance(documents, list) or len(documents) == 0: raise ValueError("Standardized schema must contain at least one document in 'documents' array") - firstDoc = documents[0] - if not isinstance(firstDoc, dict) or "sections" not in firstDoc: - raise ValueError("Document in standardized schema must contain 'sections' field") + # Phase 5: Extract sections from ALL documents + all_sections = [] + for doc in documents: + if isinstance(doc, dict) and "sections" in doc: + sections = doc.get("sections", []) + if isinstance(sections, list): + all_sections.extend(sections) - return firstDoc.get("sections", []) + if not all_sections: + raise ValueError("No sections found in any document") + + return all_sections def _extractMetadata(self, reportData: Dict[str, Any]) -> Dict[str, Any]: """ diff --git a/modules/services/serviceGeneration/renderers/rendererDocx.py b/modules/services/serviceGeneration/renderers/rendererDocx.py index 48fb94f1..f62935d8 100644 --- a/modules/services/serviceGeneration/renderers/rendererDocx.py +++ b/modules/services/serviceGeneration/renderers/rendererDocx.py @@ -9,6 +9,7 @@ from typing import Dict, Any, Tuple, List import io import base64 import re +import csv try: from docx import Document @@ -225,13 +226,36 @@ class RendererDocx(BaseRenderer): self.logger.warning(f"Could not clear template content: {str(e)}") def _renderJsonSection(self, doc: Document, section: Dict[str, Any], styles: Dict[str, Any]) -> None: - """Render a single JSON section to DOCX using AI-generated styles.""" + """Render a single JSON section to DOCX using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = section.get("content_type", "paragraph") elements = section.get("elements", []) # Process each element in the section for element in elements: + element_type = element.get("type", "") + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + para = doc.add_paragraph(f"[Reference: {label}]") + para.runs[0].italic = True + continue + elif element_type == "extracted_text": + # Extracted text format - render as paragraph + content = element.get("content", "") + source = element.get("source", "") + if content: + para = doc.add_paragraph(content) + if source: + para.add_run(f" (Source: {source})").italic = True + continue + + # Standard section types if section_type == "table": self._renderJsonTable(doc, element, styles) elif section_type == "bullet_list": @@ -848,7 +872,7 @@ class RendererDocx(BaseRenderer): Process tables in the content (both CSV and pipe-separated) and convert them to Word tables. Returns the content with tables replaced by placeholders. """ - import csv + # csv is already imported at module level lines = content.split('\n') processed_lines = [] diff --git a/modules/services/serviceGeneration/renderers/rendererHtml.py b/modules/services/serviceGeneration/renderers/rendererHtml.py index 163690d3..54c7e64b 100644 --- a/modules/services/serviceGeneration/renderers/rendererHtml.py +++ b/modules/services/serviceGeneration/renderers/rendererHtml.py @@ -297,11 +297,39 @@ class RendererHtml(BaseRenderer): return '\n'.join(css_parts) def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> str: - """Render a single JSON section to HTML using AI-generated styles.""" + """Render a single JSON section to HTML using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + htmlParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + htmlParts.append(f'

[Reference: {label}]

') + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f' (Source: {source})' if source else '' + htmlParts.append(f'

{content}{source_text}

') + continue + + # If we processed reference/extracted_text elements, return them + if htmlParts: + return '\n'.join(htmlParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererMarkdown.py b/modules/services/serviceGeneration/renderers/rendererMarkdown.py index 3c9569e9..dfe2bda2 100644 --- a/modules/services/serviceGeneration/renderers/rendererMarkdown.py +++ b/modules/services/serviceGeneration/renderers/rendererMarkdown.py @@ -77,11 +77,39 @@ class RendererMarkdown(BaseRenderer): raise Exception(f"Markdown generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to markdown.""" + """Render a single JSON section to markdown. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + markdownParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + markdownParts.append(f"*[Reference: {label}]*") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" *(Source: {source})*" if source else "" + markdownParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if markdownParts: + return '\n\n'.join(markdownParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/renderers/rendererPdf.py b/modules/services/serviceGeneration/renderers/rendererPdf.py index 1cfcfad7..128e84d3 100644 --- a/modules/services/serviceGeneration/renderers/rendererPdf.py +++ b/modules/services/serviceGeneration/renderers/rendererPdf.py @@ -477,7 +477,9 @@ class RendererPdf(BaseRenderer): return colors.black def _renderJsonSection(self, section: Dict[str, Any], styles: Dict[str, Any]) -> List[Any]: - """Render a single JSON section to PDF elements using AI-generated styles.""" + """Render a single JSON section to PDF elements using AI-generated styles. + Supports three content formats: reference, object (base64), extracted_text. + """ try: section_type = self._getSectionType(section) elements = self._getSectionData(section) @@ -485,6 +487,33 @@ class RendererPdf(BaseRenderer): # Process each element in the section all_elements = [] for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + ref_style = ParagraphStyle( + 'Reference', + parent=self._createNormalStyle(styles), + fontStyle='italic', + textColor=colors.grey + ) + all_elements.append(Paragraph(f"[Reference: {label}]", ref_style)) + all_elements.append(Spacer(1, 6)) + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + all_elements.append(Paragraph(f"{content}{source_text}", self._createNormalStyle(styles))) + all_elements.append(Spacer(1, 6)) + continue + + # Standard section types if section_type == "table": all_elements.extend(self._renderJsonTable(element, styles)) elif section_type == "bullet_list": diff --git a/modules/services/serviceGeneration/renderers/rendererPptx.py b/modules/services/serviceGeneration/renderers/rendererPptx.py index 6b1b9e18..e9ad334c 100644 --- a/modules/services/serviceGeneration/renderers/rendererPptx.py +++ b/modules/services/serviceGeneration/renderers/rendererPptx.py @@ -3,6 +3,9 @@ import logging import base64 import io +import json +import re +from datetime import datetime, UTC from typing import Dict, Any, Optional, Tuple, List from .rendererBaseTemplate import BaseRenderer @@ -261,7 +264,7 @@ class RendererPptx(BaseRenderer): Returns: List of slide content strings """ - import re + # re is already imported at module level # First, try to split by major headers (# or ##) # This is the most common case for AI-generated content @@ -399,7 +402,7 @@ class RendererPptx(BaseRenderer): def _createProfessionalPptxTemplate(self, userPrompt: str, style_schema: Dict[str, Any]) -> str: """Create a professional PowerPoint-specific AI style template for corporate-quality slides.""" - import json + # json is already imported at module level schema_json = json.dumps(style_schema, indent=4) return f"""Customize the JSON below for professional PowerPoint slides. @@ -443,8 +446,7 @@ JSON ONLY. NO OTHER TEXT.""" self.logger.warning("AI service returned no response, using defaults") return default_styles - import json - import re + # json and re are already imported at module level # Clean and parse JSON result = response.content.strip() if response and response.content else "" @@ -634,6 +636,27 @@ JSON ONLY. NO OTHER TEXT.""" content_type = section.get("content_type", "paragraph") elements = section.get("elements", []) + # Check for three content formats from Phase 5D in elements + content_parts = [] + for element in elements: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + content_parts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + content_parts.append(f"{content}{source_text}") + continue + # Handle image sections specially if content_type == "image": # Extract image data @@ -647,26 +670,25 @@ JSON ONLY. NO OTHER TEXT.""" }) return { - "title": section_title or element.get("altText", "Image"), - "content": "", # No text content for image slides + "title": section_title or (elements[0].get("altText", "Image") if elements else "Image"), + "content": "\n\n".join(content_parts) if content_parts else "", # Include reference/extracted_text if present "images": images } # Build slide content based on section type - content_parts = [] - - if content_type == "table": - content_parts.append(self._formatTableForSlide(elements)) - elif content_type == "list": - content_parts.append(self._formatListForSlide(elements)) - elif content_type == "heading": - content_parts.append(self._formatHeadingForSlide(elements)) - elif content_type == "paragraph": - content_parts.append(self._formatParagraphForSlide(elements)) - elif content_type == "code": - content_parts.append(self._formatCodeForSlide(elements)) - else: - content_parts.append(self._format_paragraph_for_slide(elements)) + if not content_parts: # Only if we didn't process reference/extracted_text above + if content_type == "table": + content_parts.append(self._formatTableForSlide(elements)) + elif content_type == "list": + content_parts.append(self._formatListForSlide(elements)) + elif content_type == "heading": + content_parts.append(self._formatHeadingForSlide(elements)) + elif content_type == "paragraph": + content_parts.append(self._formatParagraphForSlide(elements)) + elif content_type == "code": + content_parts.append(self._formatCodeForSlide(elements)) + else: + content_parts.append(self._format_paragraph_for_slide(elements)) # Combine content parts slide_content = "\n\n".join(filter(None, content_parts)) @@ -1057,5 +1079,5 @@ JSON ONLY. NO OTHER TEXT.""" def _formatTimestamp(self) -> str: """Format current timestamp for presentation generation.""" - from datetime import datetime, UTC + # datetime and UTC are already imported at module level return datetime.now(UTC).strftime("%Y-%m-%d %H:%M:%S UTC") diff --git a/modules/services/serviceGeneration/renderers/rendererText.py b/modules/services/serviceGeneration/renderers/rendererText.py index 56d4af61..acbeaaf9 100644 --- a/modules/services/serviceGeneration/renderers/rendererText.py +++ b/modules/services/serviceGeneration/renderers/rendererText.py @@ -100,11 +100,39 @@ class RendererText(BaseRenderer): raise Exception(f"Text generation failed: {str(e)}") def _renderJsonSection(self, section: Dict[str, Any]) -> str: - """Render a single JSON section to text.""" + """Render a single JSON section to text. + Supports three content formats: reference, object (base64), extracted_text. + """ try: sectionType = self._getSectionType(section) sectionData = self._getSectionData(section) + # Check for three content formats from Phase 5D in elements + if isinstance(sectionData, list): + textParts = [] + for element in sectionData: + element_type = element.get("type", "") if isinstance(element, dict) else "" + + # Support three content formats from Phase 5D + if element_type == "reference": + # Document reference format + doc_ref = element.get("documentReference", "") + label = element.get("label", "Reference") + textParts.append(f"[Reference: {label}]") + continue + elif element_type == "extracted_text": + # Extracted text format + content = element.get("content", "") + source = element.get("source", "") + if content: + source_text = f" (Source: {source})" if source else "" + textParts.append(f"{content}{source_text}") + continue + + # If we processed reference/extracted_text elements, return them + if textParts: + return '\n\n'.join(textParts) + if sectionType == "table": # Process the section data to extract table structure processedData = self._processSectionByType(section) diff --git a/modules/services/serviceGeneration/subContentGenerator.py b/modules/services/serviceGeneration/subContentGenerator.py index 0f75f595..681a5923 100644 --- a/modules/services/serviceGeneration/subContentGenerator.py +++ b/modules/services/serviceGeneration/subContentGenerator.py @@ -7,6 +7,10 @@ Generates content for each section in the document structure. import logging import asyncio +import json +import base64 +import re +import traceback from typing import Dict, Any, Optional, List, Callable from modules.services.serviceGeneration.subContentIntegrator import ContentIntegrator @@ -25,6 +29,7 @@ class ContentGenerator: structure: Dict[str, Any], cachedContent: Optional[Dict[str, Any]] = None, userPrompt: str = "", + contentParts: Optional[List[Any]] = None, progressCallback: Optional[Callable] = None, parallelGeneration: bool = True, batchSize: int = 10 @@ -33,9 +38,10 @@ class ContentGenerator: Generate content for all sections in structure. Args: - structure: Document structure from Phase 1 + structure: Document structure from Phase 1 (with contentPartIds per section) cachedContent: Extracted content cache userPrompt: Original user prompt + contentParts: List of all available ContentParts (for mapping by contentPartIds) progressCallback: Function to call for progress updates parallelGeneration: Enable parallel section generation batchSize: Number of sections to process in parallel @@ -89,6 +95,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -103,6 +110,7 @@ class ContentGenerator: sections=sections, cachedContent=cachedContent, userPrompt=userPrompt, + contentParts=contentParts, # Pass ContentParts for section generation documentMetadata=structure.get("metadata", {}), progressCallback=lambda idx, total, msg: progressCallback( currentSectionIndex + idx, @@ -138,7 +146,8 @@ class ContentGenerator: sections: List[Dict[str, Any]], cachedContent: Optional[Dict[str, Any]], userPrompt: str, - documentMetadata: Dict[str, Any], + contentParts: Optional[List[Any]] = None, + documentMetadata: Dict[str, Any] = {}, progressCallback: Optional[Callable] = None ) -> List[Dict[str, Any]]: """ @@ -149,6 +158,14 @@ class ContentGenerator: previousSections = [] totalSections = len(sections) + # Create ContentParts lookup map by ID + contentPartsMap = {} + if contentParts: + for part in contentParts: + partId = part.id if hasattr(part, 'id') else part.get('id', '') + if partId: + contentPartsMap[partId] = part + for idx, section in enumerate(sections): try: contentType = section.get("content_type", "content") @@ -171,11 +188,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": previousSections.copy(), "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None } @@ -272,11 +298,20 @@ class ContentGenerator: message ) + # Get ContentParts for this section + sectionContentPartIds = section.get("contentPartIds", []) + sectionContentParts = [] + if sectionContentPartIds and contentPartsMap: + for partId in sectionContentPartIds: + if partId in contentPartsMap: + sectionContentParts.append(contentPartsMap[partId]) + context = { "userPrompt": userPrompt, "cachedContent": cachedContent, "previousSections": batchPreviousSections.copy(), # Include sections from previous batches "targetSection": section, + "sectionContentParts": sectionContentParts, # ContentParts for this section "documentMetadata": documentMetadata, "operationId": None # Can be set if needed for nested progress } @@ -371,17 +406,13 @@ class ContentGenerator: # Create section-specific prompt sectionPrompt = self._createSectionPrompt(section, context) - # Debug: Log section generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - sectionId = section.get('id', 'unknown') - contentType = section.get('content_type', 'unknown') - try: - self.services.utils.writeDebugFile( - sectionPrompt, - f"document_generation_section_{sectionId}_{contentType}_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for section prompt: {e}") + # Debug: Log section generation prompt (harmonisiert - keine Checks nötig) + sectionId = section.get('id', 'unknown') + contentType = section.get('content_type', 'unknown') + self.services.utils.writeDebugFile( + sectionPrompt, + f"document_generation_section_{sectionId}_{contentType}_prompt" + ) # Call AI to generate content from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -397,32 +428,27 @@ class ContentGenerator: outputFormat="json" ) - # Debug: Log section generation response (always log, even if empty) + # Debug: Log section generation response (harmonisiert - keine Checks nötig) sectionId = section.get('id', 'unknown') contentType = section.get('content_type', 'unknown') - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = '' - if aiResponse: - if hasattr(aiResponse, 'content') and aiResponse.content: - responseContent = aiResponse.content - elif hasattr(aiResponse, 'documents') and aiResponse.documents: - responseContent = f"[Response has {len(aiResponse.documents)} documents]" - else: - responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" - else: - responseContent = '[No response object]' - - self.services.utils.writeDebugFile( - responseContent, - f"document_generation_section_{sectionId}_{contentType}_response" - ) - logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") - except Exception as e: - logger.warning(f"Could not write debug file for section response: {e}") - import traceback - logger.debug(traceback.format_exc()) + responseContent = '' + if aiResponse: + if hasattr(aiResponse, 'content') and aiResponse.content: + responseContent = aiResponse.content + elif hasattr(aiResponse, 'documents') and aiResponse.documents: + responseContent = f"[Response has {len(aiResponse.documents)} documents]" + else: + responseContent = f"[Response object: {type(aiResponse).__name__}, attributes: {dir(aiResponse)}]" + else: + responseContent = '[No response object]' + + # Debug: Log section generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + responseContent, + f"document_generation_section_{sectionId}_{contentType}_response" + ) + logger.debug(f"Logged section response for {sectionId} ({len(responseContent)} chars)") if not aiResponse or not aiResponse.content: logger.error(f"AI section generation returned empty response for section {sectionId}") @@ -443,7 +469,7 @@ class ContentGenerator: logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500] if extractedJson else 'None'}") raise ValueError("No JSON found in AI section response") - import json + # json is already imported at module level try: elementsData = json.loads(extractedJson) logger.debug(f"Parsed JSON for section {section.get('id')}: type={type(elementsData)}, keys={list(elementsData.keys()) if isinstance(elementsData, dict) else 'N/A'}") @@ -480,7 +506,7 @@ class ContentGenerator: # Last resort: try to extract partial content and create minimal valid JSON try: # Try to extract text content before the truncation point - import re + # re is already imported at module level # Look for text field that might be partially complete textMatch = re.search(r'"text"\s*:\s*"([^"]*)', extractedJson) if textMatch: @@ -577,14 +603,14 @@ class ContentGenerator: ) -> Dict[str, Any]: """Generate image for image section or include existing image""" try: - # Check if this is an existing image to include + # Check if this is an existing image to include or render imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Include existing image from cachedContent + if imageSource == "existing" or imageSource == "render": + # Phase 4: Include existing image or render image from cachedContent imageRefId = section.get("image_reference_id") if not imageRefId: - raise ValueError(f"Image section {section.get('id')} has image_source='existing' but no image_reference_id") + raise ValueError(f"Image section {section.get('id')} has image_source='{imageSource}' but no image_reference_id") cachedContent = context.get("cachedContent", {}) imageDocuments = cachedContent.get("imageDocuments", []) @@ -594,7 +620,7 @@ class ContentGenerator: if not imageDoc: raise ValueError(f"Image document {imageRefId} not found in cachedContent.imageDocuments") - # Create image element from existing image + # Create image element from existing/render image altText = imageDoc.get("altText", section.get("generation_hint", "Image")) mimeType = imageDoc.get("mimeType", "image/png") @@ -605,7 +631,7 @@ class ContentGenerator: "caption": section.get("metadata", {}).get("caption") }] - logger.info(f"Successfully included existing image {imageRefId} for section {section.get('id')}") + logger.info(f"Successfully integrated image {imageRefId} for section {section.get('id')} (source={imageSource})") return section # Generate new image (existing logic) @@ -620,7 +646,7 @@ class ContentGenerator: # Call AI service for image generation from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, AiCallPromptImage - import json + # json is already imported at module level # Create image generation prompt promptModel = AiCallPromptImage( @@ -664,7 +690,7 @@ class ContentGenerator: # Validate base64 data try: - import base64 + # base64 is already imported at module level base64.b64decode(base64Data[:100], validate=True) # Validate first 100 chars except Exception as e: logger.warning(f"Image data may not be valid base64: {str(e)}") @@ -710,9 +736,11 @@ class ContentGenerator: """Create sub-prompt for section content generation""" contentType = section.get("content_type", "") generationHint = section.get("generation_hint", "") + extractionPrompt = section.get("extractionPrompt") # Optional extraction prompt for ContentParts userPrompt = context.get("userPrompt", "") cachedContent = context.get("cachedContent") previousSections = context.get("previousSections", []) + sectionContentParts = context.get("sectionContentParts", []) # ContentParts for this section documentMetadata = context.get("documentMetadata", {}) # Get user language @@ -723,6 +751,51 @@ class ContentGenerator: if cachedContent and cachedContent.get("extractedContent"): cachedContentText = self._formatCachedContent(cachedContent) + # Format ContentParts for this section + contentPartsText = "" + imagePartReferences = [] # Track image parts for text reference + + if sectionContentParts: + try: + partsList = [] + imageIndex = 1 + for part in sectionContentParts: + partTypeGroup = part.typeGroup if hasattr(part, 'typeGroup') else part.get('typeGroup', '') + partMimeType = part.mimeType if hasattr(part, 'mimeType') else part.get('mimeType', '') + partId = part.id if hasattr(part, 'id') else part.get('id', '') + partData = part.data if hasattr(part, 'data') else part.get('data', '') + + # Check if this is an image part + isImage = partTypeGroup == "image" or (partMimeType and partMimeType.startswith("image/")) + + if contentType == "image" and isImage: + # For image sections: include image data for integration + partsList.append(f"- ContentPart {partId} (image): [Image data available for integration]") + elif isImage: + # For non-image sections: track for text reference + imagePartReferences.append({ + "id": partId, + "index": imageIndex + }) + imageIndex += 1 + # Don't include image data in prompt for non-image sections + else: + # For text/table/etc parts: include data preview + dataPreview = str(partData)[:200] if partData else "[No data]" + partsList.append(f"- ContentPart {partId} ({partTypeGroup}): {dataPreview}{'...' if partData and len(str(partData)) > 200 else ''}") + + if partsList: + contentPartsText = "\n".join(partsList) + + # Add image reference instructions for non-image sections + if imagePartReferences and contentType != "image": + refText = ", ".join([f"Bild {ref['index']}" if userLanguage == "de" else f"Image {ref['index']}" for ref in imagePartReferences]) + contentPartsText += f"\n\nNOTE: Reference images as text in the document language: {refText}" + + except Exception as e: + logger.warning(f"Could not format ContentParts for section prompt: {str(e)}") + contentPartsText = "" + # Format previous sections for context previousSectionsText = "" if previousSections: @@ -787,14 +860,22 @@ EXTRACTED CONTENT (if available): {cachedContentText if cachedContentText else "None"} {'='*80} +{'='*80} +CONTENT PARTS FOR THIS SECTION: +{'='*80} +{contentPartsText if contentPartsText else "No ContentParts assigned to this section."} +{'='*80} + TASK: Generate content for this section ONLY. INSTRUCTIONS: 1. Generate content appropriate for section type: {contentType} 2. Use the generation hint: {generationHint} -3. Consider previous sections for continuity -4. Use extracted content if relevant -5. All content must be in the language '{userLanguage}' +{f"3. Use extractionPrompt for ContentParts: {extractionPrompt}" if extractionPrompt else "3. Use ContentParts data if provided"} +4. Consider previous sections for continuity +5. Use extracted content if relevant +6. All content must be in the language '{userLanguage}' +7. {'For image sections: Integrate image ContentParts as visual elements' if contentType == "image" else 'For non-image sections: Reference image ContentParts as text (e.g., "siehe Bild 1" in German, "see Image 1" in English)'} 6. CRITICAL: Return ONLY a JSON object with an "elements" array. DO NOT return a full document structure. diff --git a/modules/services/serviceGeneration/subContentIntegrator.py b/modules/services/serviceGeneration/subContentIntegrator.py index 7bee437e..1a83eb6e 100644 --- a/modules/services/serviceGeneration/subContentIntegrator.py +++ b/modules/services/serviceGeneration/subContentIntegrator.py @@ -65,18 +65,14 @@ class ContentIntegrator: ) sections[idx] = section - # Debug: Write final merged structure to debug file - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - import json - structureJson = json.dumps(structure, indent=2, ensure_ascii=False) - self.services.utils.writeDebugFile( - structureJson, - "document_generation_final_merged_json" - ) - logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") - except Exception as e: - logger.debug(f"Could not write debug file for final merged JSON: {e}") + # Debug: Write final merged structure to debug file (harmonisiert - keine Checks nötig) + import json + structureJson = json.dumps(structure, indent=2, ensure_ascii=False) + self.services.utils.writeDebugFile( + structureJson, + "document_generation_final_merged_json" + ) + logger.debug(f"Logged final merged JSON structure ({len(structureJson)} chars)") return structure diff --git a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py b/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py deleted file mode 100644 index d6620d3d..00000000 --- a/modules/services/serviceGeneration/subDocumentPurposeAnalyzer.py +++ /dev/null @@ -1,316 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. -""" -Document Purpose Analyzer for hierarchical document generation. -Uses AI to analyze user prompt and determine purpose for each document. -""" - -import logging -import json -from typing import Dict, Any, List, Optional -from modules.datamodels.datamodelChat import ChatDocument -from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum - -logger = logging.getLogger(__name__) - - -class DocumentPurposeAnalyzer: - """Analyzes user prompt and documents to determine document purposes""" - - def __init__(self, services: Any): - self.services = services - - async def analyzeDocumentPurposes( - self, - userPrompt: str, - chatDocuments: List[ChatDocument], - actionContext: str = "generateDocument" - ) -> Dict[str, Any]: - """ - Use AI to analyze user prompt and determine purpose for each document. - - Args: - userPrompt: User's original prompt - chatDocuments: List of ChatDocument objects to analyze - actionContext: Action name (e.g., "generateDocument", "extractData") - - Returns: - { - "document_purposes": [ - { - "document_id": "...", - "purpose": "extract_text_content" | "include_image" | ..., - "reasoning": "...", - "extractionPrompt": "..." (if purpose requires extraction), - "processingNotes": "..." - } - ], - "overall_intent": "..." - } - """ - try: - if not chatDocuments: - return { - "document_purposes": [], - "overall_intent": "No documents provided" - } - - # Create document metadata list for AI analysis - documentMetadata = [] - for doc in chatDocuments: - docInfo = { - "document_id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "fileSize": doc.fileSize - } - documentMetadata.append(docInfo) - - # Create analysis prompt - analysisPrompt = self._createAnalysisPrompt( - userPrompt=userPrompt, - actionContext=actionContext, - documentMetadata=documentMetadata - ) - - # Debug: Log purpose analysis prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - analysisPrompt, - "document_purpose_analysis_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis prompt: {e}") - - # Call AI for analysis - options = AiCallOptions( - operationType=OperationTypeEnum.DATA_GENERATE, - resultFormat="json" - ) - - aiResponse = await self.services.ai.callAiContent( - prompt=analysisPrompt, - options=options, - outputFormat="json" - ) - - # Debug: Log purpose analysis response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - responseContent = aiResponse.content if aiResponse and aiResponse.content else '' - responseMetadata = { - "status": aiResponse.status if aiResponse else "N/A", - "error": aiResponse.error if aiResponse else "N/A", - "documents_count": len(aiResponse.documents) if aiResponse and aiResponse.documents else 0 - } - self.services.utils.writeDebugFile( - f"Response Content:\n{responseContent}\n\nResponse Metadata:\n{json.dumps(responseMetadata, indent=2)}", - "document_purpose_analysis_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for purpose analysis response: {e}") - - if not aiResponse or not aiResponse.content: - logger.warning("AI purpose analysis returned empty response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Extract and parse JSON - extractedJson = self.services.utils.jsonExtractString(aiResponse.content) - if not extractedJson: - logger.warning("No JSON found in purpose analysis response, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - try: - analysisResult = json.loads(extractedJson) - - # Validate structure - if "document_purposes" not in analysisResult: - logger.warning("Invalid analysis result structure, using defaults") - return self._createDefaultPurposes(chatDocuments, actionContext) - - # Ensure all documents have purposes - analyzedIds = {dp.get("document_id") for dp in analysisResult.get("document_purposes", [])} - for doc in chatDocuments: - if doc.id not in analyzedIds: - logger.warning(f"Document {doc.id} not in analysis result, adding default purpose") - defaultPurpose = self._determineDefaultPurpose(doc, actionContext) - analysisResult["document_purposes"].append({ - "document_id": doc.id, - "purpose": defaultPurpose, - "reasoning": f"Default purpose based on document type and action context", - "extractionPrompt": None, - "processingNotes": None - }) - - return analysisResult - - except json.JSONDecodeError as e: - logger.error(f"Failed to parse purpose analysis JSON: {str(e)}") - logger.error(f"Extracted JSON (first 500 chars): {extractedJson[:500]}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - except Exception as e: - logger.error(f"Error analyzing document purposes: {str(e)}") - return self._createDefaultPurposes(chatDocuments, actionContext) - - def _createAnalysisPrompt( - self, - userPrompt: str, - actionContext: str, - documentMetadata: List[Dict[str, Any]] - ) -> str: - """Create AI prompt for document purpose analysis""" - - # Format document list - docListText = "" - for i, docInfo in enumerate(documentMetadata, 1): - docListText += f"\n{i}. Document ID: {docInfo['document_id']}\n" - docListText += f" File Name: {docInfo['fileName']}\n" - docListText += f" MIME Type: {docInfo['mimeType']}\n" - docListText += f" File Size: {docInfo['fileSize']} bytes\n" - - # Get user language - userLanguage = self._getUserLanguage() - - prompt = f"""{'='*80} -DOCUMENT PURPOSE ANALYSIS -{'='*80} - -USER PROMPT: -{userPrompt} - -ACTION CONTEXT: {actionContext} - -DOCUMENTS PROVIDED: -{docListText} -{'='*80} - -TASK: For each document, determine its purpose based on: -1. User prompt intent (what the user wants to do) -2. Action context (what action is being performed) -3. Document type (mimeType - is it text, image, etc.) -4. Document metadata (fileName, size) - -AVAILABLE PURPOSES: -- "extract_text_content": Extract text content for use in document generation -- "include_image": Include the image directly in the generated document (for images) -- "analyze_image_vision": Analyze image with vision AI to extract text/information (for images with text/charts) -- "use_as_template": Use document structure/layout as template for generation -- "use_as_reference": Use as background context/reference without detailed extraction -- "extract_data": Extract structured data (key-value pairs, entities, fields) -- "attach": Document is an attachment - don't process, just attach to output -- "convert_format": Convert document format (for convert actions) -- "translate": Translate document content (for translate actions) -- "summarize": Create summary of document (for summarize actions) -- "compare": Compare documents (for comparison actions) -- "merge": Merge documents (for merge actions) -- "extract_tables_charts": Extract tables and charts specifically -- "use_for_styling": Use document for styling/formatting reference only -- "extract_metadata": Extract only document metadata - -CRITICAL RULES: -1. For images (mimeType starts with "image/"): - - If user wants to "include" or "show" images → "include_image" - - If user wants to "analyze", "read text", or "extract text" from images → "analyze_image_vision" - - Default for images in generateDocument → "include_image" - -2. For text documents in generateDocument: - - If user mentions "template" or "structure" → "use_as_template" - - If user mentions "reference" or "context" → "use_as_reference" - - Default → "extract_text_content" - -3. Consider action context: - - generateDocument: Usually "extract_text_content" or "include_image" - - extractData: Usually "extract_data" - - translateDocument: Usually "translate" - - summarizeDocument: Usually "summarize" - -4. Return ONLY valid JSON following this structure: -{{ - "document_purposes": [ - {{ - "document_id": "document_id_here", - "purpose": "extract_text_content", - "reasoning": "Brief explanation in language '{userLanguage}'", - "extractionPrompt": "Specific extraction prompt if purpose requires extraction, otherwise null", - "processingNotes": "Any special processing requirements or null" - }} - ], - "overall_intent": "Summary of how documents should be used together in language '{userLanguage}'" -}} - -5. All content must be in the language '{userLanguage}' -6. Return ONLY the JSON structure. No explanations before or after. - -Return ONLY the JSON structure. -""" - return prompt - - def _createDefaultPurposes( - self, - chatDocuments: List[ChatDocument], - actionContext: str - ) -> Dict[str, Any]: - """Create default purposes when AI analysis fails""" - purposes = [] - - for doc in chatDocuments: - purpose = self._determineDefaultPurpose(doc, actionContext) - purposes.append({ - "document_id": doc.id, - "purpose": purpose, - "reasoning": f"Default purpose based on document type ({doc.mimeType}) and action context ({actionContext})", - "extractionPrompt": None, - "processingNotes": None - }) - - return { - "document_purposes": purposes, - "overall_intent": f"Default processing for {len(chatDocuments)} document(s) in {actionContext} action" - } - - def _determineDefaultPurpose( - self, - doc: ChatDocument, - actionContext: str - ) -> str: - """Determine default purpose based on document type and action context""" - mimeType = doc.mimeType or "" - - # Image documents - if mimeType.startswith("image/"): - if actionContext == "generateDocument": - return "include_image" - elif actionContext in ["extractData", "process"]: - return "analyze_image_vision" - else: - return "include_image" # Default for images - - # Action-specific defaults - if actionContext == "extractData": - return "extract_data" - elif actionContext == "translateDocument": - return "translate" - elif actionContext == "summarizeDocument": - return "summarize" - elif actionContext == "convertDocument" or actionContext == "convert": - return "convert_format" - elif actionContext == "generateDocument": - return "extract_text_content" - else: - # Default for other actions - return "extract_text_content" - - def _getUserLanguage(self) -> str: - """Get user language for document generation""" - try: - if self.services: - if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage: - return self.services.currentUserLanguage - elif hasattr(self.services, 'user') and self.services.user and hasattr(self.services.user, 'language'): - return self.services.user.language - except Exception: - pass - return 'en' # Default fallback - diff --git a/modules/services/serviceGeneration/subPromptBuilderGeneration.py b/modules/services/serviceGeneration/subPromptBuilderGeneration.py index 9a78b9f4..0ee6fa5e 100644 --- a/modules/services/serviceGeneration/subPromptBuilderGeneration.py +++ b/modules/services/serviceGeneration/subPromptBuilderGeneration.py @@ -19,7 +19,8 @@ async def buildGenerationPrompt( title: str, extracted_content: str = None, continuationContext: Dict[str, Any] = None, - services: Any = None + services: Any = None, + useContentParts: bool = False # ARCHITECTURE: If True, don't include full content in prompt (ContentParts will be used directly) ) -> str: """ Build the unified generation prompt using a single JSON template. @@ -120,7 +121,9 @@ Continue generating the remaining content now. # PROMPT FOR FIRST CALL # Structure: User request + Extracted content FIRST (if available), then JSON template, then instructions - if extracted_content: + # ARCHITECTURE: If useContentParts=True, don't include full content in prompt + # ContentParts will be passed directly to callAi for model-aware chunking + if extracted_content and not useContentParts: # If we have extracted content, put it FIRST and make it very clear it's the source data generationPrompt = f"""{'='*80} USER REQUEST / USER PROMPT: diff --git a/modules/services/serviceGeneration/subStructureGenerator.py b/modules/services/serviceGeneration/subStructureGenerator.py index d2ef1aeb..62e72c69 100644 --- a/modules/services/serviceGeneration/subStructureGenerator.py +++ b/modules/services/serviceGeneration/subStructureGenerator.py @@ -24,6 +24,7 @@ class StructureGenerator: userPrompt: str, documentList: Optional[Any] = None, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> Dict[str, Any]: @@ -34,30 +35,28 @@ class StructureGenerator: userPrompt: User's original prompt documentList: Optional document references cachedContent: Optional extracted content cache + contentParts: Optional list of ContentParts to analyze for structure generation maxSectionLength: Maximum words for simple sections existingImages: Optional list of existing images to include Returns: - Document structure with empty elements arrays + Document structure with empty elements arrays and contentPartIds per section """ try: # Create structure generation prompt structurePrompt = self._createStructurePrompt( userPrompt=userPrompt, cachedContent=cachedContent, + contentParts=contentParts, maxSectionLength=maxSectionLength, existingImages=existingImages or [] ) - # Debug: Log structure generation prompt - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - structurePrompt, - "document_generation_structure_prompt" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure prompt: {e}") + # Debug: Log structure generation prompt (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + structurePrompt, + "document_generation_structure_prompt" + ) # Call AI to generate structure from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum @@ -73,15 +72,11 @@ class StructureGenerator: outputFormat="json" ) - # Debug: Log structure generation response - if self.services and hasattr(self.services, 'utils') and hasattr(self.services.utils, 'writeDebugFile'): - try: - self.services.utils.writeDebugFile( - aiResponse.content if aiResponse and aiResponse.content else '', - "document_generation_structure_response" - ) - except Exception as e: - logger.debug(f"Could not write debug file for structure response: {e}") + # Debug: Log structure generation response (harmonisiert - keine Checks nötig) + self.services.utils.writeDebugFile( + aiResponse.content if aiResponse and aiResponse.content else '', + "document_generation_structure_response" + ) if not aiResponse or not aiResponse.content: raise ValueError("AI structure generation returned empty response") @@ -106,6 +101,7 @@ class StructureGenerator: self, userPrompt: str, cachedContent: Optional[Dict[str, Any]] = None, + contentParts: Optional[List[Any]] = None, maxSectionLength: int = 500, existingImages: Optional[List[Dict[str, Any]]] = None ) -> str: @@ -126,6 +122,41 @@ class StructureGenerator: if cachedContent and cachedContent.get("imageDocuments"): existingImages = cachedContent.get("imageDocuments", []) + # Format ContentParts as JSON for structure generation + contentPartsJson = "" + if contentParts: + try: + import json + # Convert ContentParts to dict format for JSON serialization + contentPartsList = [] + for part in contentParts: + if hasattr(part, 'dict'): + partDict = part.dict() + elif isinstance(part, dict): + partDict = part + else: + # Try to convert to dict + partDict = { + "id": getattr(part, 'id', ''), + "typeGroup": getattr(part, 'typeGroup', ''), + "mimeType": getattr(part, 'mimeType', ''), + "label": getattr(part, 'label', ''), + "metadata": getattr(part, 'metadata', {}) + } + # Only include essential fields for structure generation (not full data) + contentPartsList.append({ + "id": partDict.get("id", ""), + "typeGroup": partDict.get("typeGroup", ""), + "mimeType": partDict.get("mimeType", ""), + "label": partDict.get("label", ""), + "metadata": partDict.get("metadata", {}) + }) + + contentPartsJson = json.dumps(contentPartsList, indent=2, ensure_ascii=False) + except Exception as e: + logger.warning(f"Could not format ContentParts as JSON: {str(e)}") + contentPartsJson = "" + # Create structure template structureTemplate = jsonTemplateDocument.replace("{{DOCUMENT_TITLE}}", "Document Title") @@ -145,13 +176,15 @@ EXTRACTED CONTENT (if available): {'='*80} INSTRUCTIONS: -1. Analyze the user request and extracted content +1. Analyze the user request, extracted content, and available ContentParts 2. Create a document structure with CONTENT sections only 3. For each section, specify: - id: Unique identifier (e.g., "section_title_1", "section_image_1") - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) - generation_hint: Brief description of what content should be generated + - contentPartIds: Array of ContentPart IDs that should be used for this section (e.g., ["part_1", "part_2"]) - can be empty [] + - extractionPrompt: (optional) Specific prompt for extracting/processing ContentParts for this section - image_prompt: (only for image sections) Detailed prompt for image generation - order: Section order number (starting from 1) - elements: [] (empty array - will be populated later) @@ -160,10 +193,12 @@ INSTRUCTIONS: - If user requests illustrations/images, create image sections - If existing images are provided in documentList (check EXISTING IMAGES section below), create image sections that reference them - Add image_prompt field with detailed description for image generation (only for new images) - - Set complexity to "complex" + - Set complexity to "complex" for new images, "simple" for existing/render images - For existing images: Set image_source to "existing" and image_reference_id to the image document ID + - For images to render (from input documents): Set image_source to "render" and image_reference_id to the image document ID - Example for new image: {{"id": "section_image_1", "content_type": "image", "complexity": "complex", "generation_hint": "Illustration for chapter 1", "image_prompt": "A detailed description for image generation", "order": 2, "elements": []}} - Example for existing image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Include provided image", "image_source": "existing", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} + - Example for render image: {{"id": "section_image_1", "content_type": "image", "complexity": "simple", "generation_hint": "Render input image", "image_source": "render", "image_reference_id": "doc_id_here", "order": 2, "elements": []}} {'='*80} EXISTING IMAGES (to include in document): @@ -178,12 +213,21 @@ EXISTING IMAGES (to include in document): 7. Return ONLY valid JSON following this structure: {structureTemplate} -5. CRITICAL RULES: +5. CRITICAL RULES FOR CONTENT PARTS: + - Analyze available ContentParts and determine which ones are needed for each section + - For image sections (content_type == "image"): Include image ContentParts in contentPartIds - images will be integrated as visual elements + - For other sections (heading, paragraph, etc.): If image ContentParts are referenced, they will be referenced as text in the document language (not integrated as images) + - Each section can reference multiple ContentParts via contentPartIds array + - If specific extraction/processing is needed for ContentParts, provide extractionPrompt + - Image references in non-image sections should be automatically derived in the document language (e.g., "siehe Bild 1" in German, "see Image 1" in English) + +6. CRITICAL RULES: - Return ONLY valid JSON (no comments, no trailing commas, double quotes only) - Follow the exact JSON schema structure provided - IMPORTANT: All sections MUST have empty elements arrays: "elements": [] (the template shows examples with content, but you must use empty arrays) - ALL sections MUST include "generation_hint" field with a brief description of what content should be generated - ALL sections MUST include "complexity" field: "simple" for short content, "complex" for long chapters/images + - ALL sections MUST include "contentPartIds" field (can be empty array [] if no ContentParts needed) - Image sections MUST include "image_prompt" field with detailed description for image generation - Order numbers MUST start from 1 (not 0) - All content must be in the language '{userLanguage}' @@ -235,6 +279,14 @@ Return ONLY the JSON structure. No explanations. if "elements" not in section: section["elements"] = [] + # Ensure contentPartIds field exists (can be empty array) + if "contentPartIds" not in section: + section["contentPartIds"] = [] + + # Ensure extractionPrompt field exists (optional) + if "extractionPrompt" not in section: + section["extractionPrompt"] = None + # Identify complexity if not set if "complexity" not in section: section["complexity"] = self._identifySectionComplexity( @@ -255,11 +307,11 @@ Return ONLY the JSON structure. No explanations. if section.get("content_type") == "image": imageSource = section.get("image_source", "generate") - if imageSource == "existing": - # Existing image - ensure image_reference_id is set + if imageSource == "existing" or imageSource == "render": + # Existing or render image - ensure image_reference_id is set if "image_reference_id" not in section: - logger.warning(f"Image section {sectionId} has image_source='existing' but no image_reference_id") - # Existing images are simple (no generation needed) + logger.warning(f"Image section {sectionId} has image_source='{imageSource}' but no image_reference_id") + # Existing/render images are simple (no generation needed, code integration) section["complexity"] = "simple" else: # New image generation - ensure image_prompt diff --git a/modules/shared/jsonUtils.py b/modules/shared/jsonUtils.py index f2678b63..9a7cffab 100644 --- a/modules/shared/jsonUtils.py +++ b/modules/shared/jsonUtils.py @@ -2,6 +2,7 @@ # All rights reserved. import json import logging +import re from typing import Any, Dict, List, Optional, Tuple, Union, Type, TypeVar from pydantic import BaseModel, ValidationError @@ -11,10 +12,32 @@ T = TypeVar('T', bound=BaseModel) def stripCodeFences(text: str) -> str: - """Remove ```json / ``` fences and surrounding whitespace if present.""" + """Remove ```json / ``` fences and surrounding whitespace if present. + Also removes [SOURCE: ...] and [END SOURCE] tags that may wrap the JSON.""" if not text: return text s = text.strip() + + # Remove [SOURCE: ...] tags at the beginning + if s.startswith("[SOURCE:"): + # Find the end of the SOURCE tag (newline or end of string) + end_pos = s.find("\n") + if end_pos != -1: + s = s[end_pos+1:] + else: + # No newline, entire string is SOURCE tag + return "" + + # Remove [END SOURCE] tags at the end + if s.endswith("[END SOURCE]"): + # Find the start of END SOURCE tag (newline before it) + start_pos = s.rfind("\n[END SOURCE]") + if start_pos != -1: + s = s[:start_pos] + else: + # No newline, entire string is END SOURCE tag + return "" + # Handle opening fence (may or may not have closing fence) if s.startswith("```"): # Remove first triple backticks @@ -201,7 +224,7 @@ def closeJsonStructures(text: str) -> str: # Look for patterns like: "value" or "value\n (unterminated) # Check if we're in the middle of a string value when text ends if result.strip(): - import re + # re is already imported at module level # Count quotes - if odd number, we have an unterminated string quoteCount = result.count('"') if quoteCount % 2 == 1: @@ -367,7 +390,7 @@ def _removeLastIncompleteItem(items: List[str], original_text: str) -> List[str] Remove the last item if it appears to be incomplete/corrupted. This prevents corrupted data from being included in the final result. """ - import re + # re is already imported at module level if not items: return items @@ -418,7 +441,7 @@ def _extractGenericContent(text: str) -> List[Dict[str, Any]]: CRITICAL: Must preserve original content_type and id from the JSON structure! """ - import re + # re is already imported at module level sections = [] @@ -1025,7 +1048,7 @@ def _extractCutOffElements(incomplete_section: Dict[str, Any], raw_json: str) -> if not cut_off_element: # Extract the last incomplete part from raw JSON # Find the last incomplete string/number/array - import re + # re is already imported at module level # Look for incomplete string at the end incomplete_match = re.search(r'"([^"]*?)(?:"|$)', raw_json[-500:], re.DOTALL) if incomplete_match: @@ -1045,7 +1068,7 @@ def _extractCutOffFromElement(element: Dict[str, Any], raw_json: str) -> Optiona This helps identify where exactly to continue within nested structures. """ - import re + # re is already imported at module level # Check for code_block with nested JSON if "code" in element: diff --git a/modules/workflows/methods/methodAi/actions/__init__.py b/modules/workflows/methods/methodAi/actions/__init__.py index f0ba9d4d..8ebe6679 100644 --- a/modules/workflows/methods/methodAi/actions/__init__.py +++ b/modules/workflows/methods/methodAi/actions/__init__.py @@ -8,9 +8,7 @@ from .process import process from .webResearch import webResearch from .summarizeDocument import summarizeDocument from .translateDocument import translateDocument -from .convert import convert from .convertDocument import convertDocument -from .extractData import extractData from .generateDocument import generateDocument __all__ = [ @@ -18,9 +16,7 @@ __all__ = [ 'webResearch', 'summarizeDocument', 'translateDocument', - 'convert', 'convertDocument', - 'extractData', 'generateDocument', ] diff --git a/modules/workflows/methods/methodAi/actions/convert.py b/modules/workflows/methods/methodAi/actions/convert.py deleted file mode 100644 index 788fadea..00000000 --- a/modules/workflows/methods/methodAi/actions/convert.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Convert action for AI operations. -Converts documents/data between different formats with specific formatting options. -""" - -import logging -import json -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelDocref import DocumentReferenceList - -logger = logging.getLogger(__name__) - -@action -async def convert(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Convert documents/data between different formats with specific formatting options (e.g., JSON→CSV with custom columns, delimiters). - - Input requirements: documentList (required); inputFormat and outputFormat (required). - - Output format: Document in target format with specified formatting options. - - CRITICAL: If input is already in standardized JSON format, uses automatic rendering system (no AI call needed). - - Parameters: - - documentList (list, required): Document reference(s) to convert. - - inputFormat (str, required): Source format (json, csv, xlsx, txt, etc.). - - outputFormat (str, required): Target format (csv, json, xlsx, txt, etc.). - - columnsPerRow (int, optional): For CSV output, number of columns per row. Default: auto-detect. - - delimiter (str, optional): For CSV output, delimiter character. Default: comma (,). - - includeHeader (bool, optional): For CSV output, whether to include header row. Default: True. - - language (str, optional): Language for output (e.g., 'de', 'en', 'fr'). Default: 'en'. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - inputFormat = parameters.get("inputFormat") - outputFormat = parameters.get("outputFormat") - if not inputFormat or not outputFormat: - return ActionResult.isFailure(error="inputFormat and outputFormat are required") - - # Normalize formats (remove leading dot if present) - normalizedInputFormat = inputFormat.strip().lstrip('.').lower() - normalizedOutputFormat = outputFormat.strip().lstrip('.').lower() - - # Get documents - if isinstance(documentList, DocumentReferenceList): - docRefList = documentList - elif isinstance(documentList, list): - docRefList = DocumentReferenceList.from_string_list(documentList) - else: - docRefList = DocumentReferenceList.from_string_list([documentList]) - - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if not chatDocuments: - return ActionResult.isFailure(error="No documents found in documentList") - - # Check if input is standardized JSON format - if so, use direct rendering - if normalizedInputFormat == "json" and len(chatDocuments) == 1: - try: - doc = chatDocuments[0] - # ChatDocument doesn't have documentData - need to load file content using fileId - docBytes = self.services.chat.getFileData(doc.fileId) - if not docBytes: - raise ValueError(f"No file data found for fileId={doc.fileId}") - - # Decode bytes to string - docData = docBytes.decode('utf-8') - - # Try to parse as JSON - if isinstance(docData, str): - jsonData = json.loads(docData) - elif isinstance(docData, dict): - jsonData = docData - else: - jsonData = None - - # Check if it's standardized JSON format (has "documents" or "sections") - if jsonData and (isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData)): - # Use direct rendering - no AI call needed! - from modules.services.serviceGeneration.mainServiceGeneration import GenerationService - generationService = GenerationService(self.services) - - # Ensure format is "documents" array - if "documents" not in jsonData: - jsonData = {"documents": [{"sections": jsonData.get("sections", []), "metadata": jsonData.get("metadata", {})}]} - - # Get title - title = jsonData.get("metadata", {}).get("title", doc.documentName or "Converted Document") - - # Render with options - renderOptions = {} - if normalizedOutputFormat == "csv": - renderOptions["delimiter"] = parameters.get("delimiter", ",") - renderOptions["columnsPerRow"] = parameters.get("columnsPerRow") - renderOptions["includeHeader"] = parameters.get("includeHeader", True) - - rendered_content, mime_type, _images = await generationService.renderReport( - jsonData, normalizedOutputFormat, title, None, None - ) - - # Apply CSV options if needed (renderer will handle them) - if normalizedOutputFormat == "csv" and renderOptions: - rendered_content = self.csvProcessing.applyCsvOptions(rendered_content, renderOptions) - - validationMetadata = { - "actionType": "ai.convert", - "inputFormat": normalizedInputFormat, - "outputFormat": normalizedOutputFormat, - "hasSourceJson": True, - "conversionType": "direct_rendering" - } - actionDoc = ActionDocument( - documentName=f"{doc.documentName.rsplit('.', 1)[0] if '.' in doc.documentName else doc.documentName}.{normalizedOutputFormat}", - documentData=rendered_content, - mimeType=mime_type, - sourceJson=jsonData, # Preserve source JSON for structure validation - validationMetadata=validationMetadata - ) - - return ActionResult.isSuccess(documents=[actionDoc]) - - except Exception as e: - logger.warning(f"Direct rendering failed, falling back to AI conversion: {str(e)}") - # Fall through to AI-based conversion - - # Fallback: Use AI for conversion (for non-JSON inputs or complex conversions) - columnsPerRow = parameters.get("columnsPerRow") - delimiter = parameters.get("delimiter", ",") - includeHeader = parameters.get("includeHeader", True) - language = parameters.get("language", "en") - - aiPrompt = f"Convert the provided document(s) from {normalizedInputFormat.upper()} format to {normalizedOutputFormat.upper()} format." - - if normalizedOutputFormat == "csv": - aiPrompt += f" Use '{delimiter}' as the delimiter character." - if columnsPerRow: - aiPrompt += f" Format the output with {columnsPerRow} columns per row." - if not includeHeader: - aiPrompt += " Do not include a header row." - else: - aiPrompt += " Include a header row with column names." - - if language and language != "en": - aiPrompt += f" Use language: {language}." - - aiPrompt += " Preserve all data and ensure accurate conversion. Maintain data integrity and structure." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": normalizedOutputFormat - }) - diff --git a/modules/workflows/methods/methodAi/actions/extractData.py b/modules/workflows/methods/methodAi/actions/extractData.py deleted file mode 100644 index 723914bd..00000000 --- a/modules/workflows/methods/methodAi/actions/extractData.py +++ /dev/null @@ -1,59 +0,0 @@ -# Copyright (c) 2025 Patrick Motsch -# All rights reserved. - -""" -Extract Data action for AI operations. -Extracts structured data from documents (key-value pairs, entities, facts, etc.). -""" - -import logging -from typing import Dict, Any -from modules.workflows.methods.methodBase import action -from modules.datamodels.datamodelChat import ActionResult - -logger = logging.getLogger(__name__) - -@action -async def extractData(self, parameters: Dict[str, Any]) -> ActionResult: - """ - GENERAL: - - Purpose: Extract structured data from documents (key-value pairs, entities, facts, etc.). - - Input requirements: documentList (required); optional dataStructure, fields. - - Output format: JSON by default, or specified resultType. - - Parameters: - - documentList (list, required): Document reference(s) to extract data from. - - dataStructure (str, optional): Desired data structure - flat, nested, or list. Default: nested. - - fields (list, optional): Specific fields/properties to extract (e.g., ["name", "date", "amount"]). - - resultType (str, optional): Output format (json, csv, xlsx, etc.). Default: json. - """ - documentList = parameters.get("documentList", []) - if not documentList: - return ActionResult.isFailure(error="documentList is required") - - dataStructure = parameters.get("dataStructure", "nested") - fields = parameters.get("fields", []) - resultType = parameters.get("resultType", "json") - - aiPrompt = "Extract structured data from the provided document(s)." - if fields: - fieldsStr = ", ".join(fields) - aiPrompt += f" Extract the following specific fields: {fieldsStr}." - else: - aiPrompt += " Extract all relevant data including names, dates, amounts, entities, and key information." - - structureInstructions = { - "flat": "Use a flat key-value structure with simple properties.", - "nested": "Use a nested JSON structure with logical grouping of related data.", - "list": "Structure the data as a list/array of objects, one per entity or record." - } - aiPrompt += f" {structureInstructions.get(dataStructure.lower(), structureInstructions['nested'])}" - - aiPrompt += " Ensure all extracted data is accurate and complete." - - return await self.process({ - "aiPrompt": aiPrompt, - "documentList": documentList, - "resultType": resultType - }) - diff --git a/modules/workflows/methods/methodAi/actions/generateDocument.py b/modules/workflows/methods/methodAi/actions/generateDocument.py index 5b5db12f..6569ddab 100644 --- a/modules/workflows/methods/methodAi/actions/generateDocument.py +++ b/modules/workflows/methods/methodAi/actions/generateDocument.py @@ -3,18 +3,17 @@ """ Generate Document action for AI operations. -Generates documents from scratch or based on templates/inputs using hierarchical approach. +Wrapper around AI service callAiContent method. """ import logging import time -from typing import Dict, Any, Optional +from typing import Dict, Any, Optional, List from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy -from modules.services.serviceGeneration.subStructureGenerator import StructureGenerator -from modules.services.serviceGeneration.subContentGenerator import ContentGenerator -from modules.services.serviceGeneration.subDocumentPurposeAnalyzer import DocumentPurposeAnalyzer +from modules.datamodels.datamodelExtraction import ContentPart +from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum +from modules.datamodels.datamodelWorkflow import AiResponse, DocumentData logger = logging.getLogger(__name__) @@ -59,38 +58,15 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: resultType = "txt" logger.info(f"Auto-detected Text format from prompt") - maxSectionLength = parameters.get("maxSectionLength", 500) - parallelGeneration = parameters.get("parallelGeneration", True) - progressLogging = parameters.get("progressLogging", True) - # Create operation ID for progress tracking workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" operationId = f"doc_gen_{workflowId}_{int(time.time())}" parentOperationId = parameters.get('parentOperationId') try: - # Phase 1: Structure Generation - if progressLogging: - self.services.chat.progressLogStart( - operationId, - "Document", - "Structure Generation", - "Generating document structure...", - parentOperationId=parentOperationId - ) - - structureGenerator = StructureGenerator(self.services) - - # Analyze document purposes and process documents accordingly - cachedContent = None - imageDocuments = [] - documentPurposes = {} - + # Convert documentList to DocumentReferenceList if needed + docRefList = None if documentList: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.1, "Analyzing document purposes...") - - # Convert documentList to DocumentReferenceList from modules.datamodels.datamodelDocref import DocumentReferenceList if isinstance(documentList, DocumentReferenceList): @@ -101,301 +77,78 @@ async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: docRefList = DocumentReferenceList.from_string_list(documentList) else: docRefList = DocumentReferenceList(references=[]) - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(docRefList) - if chatDocuments: - logger.info(f"Analyzing purposes for {len(chatDocuments)} documents") - - # Analyze document purposes using AI - purposeAnalyzer = DocumentPurposeAnalyzer(self.services) - purposeAnalysis = await purposeAnalyzer.analyzeDocumentPurposes( - userPrompt=prompt, - chatDocuments=chatDocuments, - actionContext="generateDocument" - ) - - documentPurposes = {dp["document_id"]: dp for dp in purposeAnalysis.get("document_purposes", [])} - logger.info(f"Purpose analysis complete: {purposeAnalysis.get('overall_intent', 'N/A')}") - - # Separate documents by purpose - textDocs = [] - imageDocsToInclude = [] - imageDocsToAnalyze = [] - - for doc in chatDocuments: - docPurpose = documentPurposes.get(doc.id, {}) - purpose = docPurpose.get("purpose", "extract_text_content") - - if purpose == "include_image": - imageDocsToInclude.append(doc) - elif purpose == "analyze_image_vision": - imageDocsToAnalyze.append(doc) - elif purpose in ["extract_text_content", "use_as_template", "use_as_reference", "extract_data"]: - textDocs.append(doc) - # Skip "attach" purpose - don't process - - # Process text documents (extract content) - extractedResults = [] - if textDocs: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.15, f"Extracting content from {len(textDocs)} text document(s)...") - - # Prepare extraction options with purpose-specific prompts - extractionOptionsList = [] - for doc in textDocs: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all content from the document" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - extractionOptionsList.append((doc, extractionOptions)) - - # Extract content from text documents - for doc, extractionOptions in extractionOptionsList: - try: - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error extracting content from {doc.fileName}: {str(e)}") - - logger.info(f"Extracted content from {len(extractedResults)} text document(s)") - - # Process images to analyze (vision call) - if imageDocsToAnalyze: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, f"Analyzing {len(imageDocsToAnalyze)} image(s) with vision AI...") - - # Extract content from images using vision analysis - for doc in imageDocsToAnalyze: - try: - docPurpose = documentPurposes.get(doc.id, {}) - extractionPrompt = docPurpose.get("extractionPrompt") or "Extract all text and information from this image" - - extractionOptions = ExtractionOptions( - prompt=extractionPrompt, - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - docResults = self.services.extraction.extractContent( - [doc], - extractionOptions, - parentOperationId=operationId - ) - extractedResults.extend(docResults) - except Exception as e: - logger.error(f"Error analyzing image {doc.fileName}: {str(e)}") - - logger.info(f"Analyzed {len(imageDocsToAnalyze)} image(s) with vision AI") - - # Process images to include (store image data) - if imageDocsToInclude: - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.25, f"Preparing {len(imageDocsToInclude)} image(s) for inclusion...") - - # Get image data for inclusion - from modules.interfaces.interfaceDbComponentObjects import getInterface - dbInterface = getInterface() - - for doc in imageDocsToInclude: - try: - # Get image bytes - imageBytes = dbInterface.getFileData(doc.fileId) - if imageBytes: - # Encode to base64 - import base64 - base64Data = base64.b64encode(imageBytes).decode('utf-8') - - # Create image document entry - imageDoc = { - "id": doc.id, - "fileName": doc.fileName, - "mimeType": doc.mimeType, - "base64Data": base64Data, - "altText": doc.fileName or "Image", - "fileSize": doc.fileSize - } - imageDocuments.append(imageDoc) - logger.debug(f"Prepared image {doc.fileName} for inclusion ({len(base64Data)} chars base64)") - else: - logger.warning(f"Could not retrieve image data for {doc.fileName}") - except Exception as e: - logger.error(f"Error preparing image {doc.fileName} for inclusion: {str(e)}") - - logger.info(f"Prepared {len(imageDocuments)} image(s) for inclusion") - - # Build cachedContent with all information - cachedContent = { - "extractedContent": extractedResults, - "imageDocuments": imageDocuments, - "documentPurposes": documentPurposes, - "extractionTimestamp": time.time(), - "sourceDocuments": [doc.id for doc in chatDocuments] - } - - logger.info(f"Document processing complete: {len(extractedResults)} extracted, {len(imageDocuments)} images to include") - # Generate structure - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.2, "Generating document structure...") + # Prepare title + title = parameters.get("documentType") or "Generated Document" - structure = await structureGenerator.generateStructure( - userPrompt=prompt, - documentList=documentList if documentList else None, - cachedContent=cachedContent, - maxSectionLength=maxSectionLength, - existingImages=imageDocuments # Pass existing images for structure generation + # Call AI service for document generation + # callAiContent handles documentList internally via Phases 5A-5E + options = AiCallOptions( + operationType=OperationTypeEnum.DATA_GENERATE, + priority=PriorityEnum.BALANCED, + processingMode=ProcessingModeEnum.DETAILED, + compressPrompt=False, + compressContext=False ) - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") - - # Phase 2: Content Generation - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.34, - "Starting content generation..." - ) - - contentGenerator = ContentGenerator(self.services) - - # Create enhanced progress callback - def progressCallback(sectionIndex: int, totalSections: int, message: str): - if progressLogging: - # Calculate progress: 34% to 90% for content generation phase - if totalSections > 0: - progress = 0.34 + (0.56 * (sectionIndex / totalSections)) - else: - progress = 0.34 - - # Format message - if sectionIndex > 0 and totalSections > 0: - progressMessage = f"Section {sectionIndex}/{totalSections}: {message}" - else: - progressMessage = message - - self.services.chat.progressLogUpdate( - operationId, - progress, - progressMessage - ) - - completeStructure = await contentGenerator.generateContent( - structure=structure, - cachedContent=cachedContent, - userPrompt=prompt, - progressCallback=progressCallback, - parallelGeneration=parallelGeneration - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") - - # Phase 3: Integration & Rendering - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.91, - "Rendering final document..." - ) - - # Use existing renderReport method - title = structure.get("metadata", {}).get("title", "Generated Document") - if documentType: - title = f"{title} ({documentType})" - - renderedContent, mimeType, images = await self.services.generation.renderReport( - extractedContent=completeStructure, + aiResponse: AiResponse = await self.services.ai.callAiContent( + prompt=prompt, + options=options, + documentList=docRefList, # Übergebe documentList direkt - callAiContent macht Phasen 5A-5E outputFormat=resultType, title=title, - userPrompt=prompt, - aiService=self.services.ai + parentOperationId=parentOperationId ) - # Build list of documents to return - documents = [ - ActionDocument( - documentName=f"document.{resultType}", - documentData=renderedContent, - mimeType=mimeType - ) - ] + # Convert AiResponse to ActionResult + documents = [] - # Add images as separate documents - if images: - logger.info(f"Processing {len(images)} image(s) from renderer") - import base64 - for idx, imageData in enumerate(images): - try: - base64Data = imageData.get("base64Data", "") - altText = imageData.get("altText", f"image_{idx + 1}") - caption = imageData.get("caption", "") - sectionId = imageData.get("sectionId", f"section_{idx + 1}") - - if base64Data: - # Decode base64 to bytes - imageBytes = base64.b64decode(base64Data) - - # Determine filename and mime type - filename = imageData.get("filename", f"image_{idx + 1}.png") - if not filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.webp')): - filename = f"image_{idx + 1}.png" - - # Determine mime type from filename - if filename.lower().endswith('.png'): - imageMimeType = "image/png" - elif filename.lower().endswith(('.jpg', '.jpeg')): - imageMimeType = "image/jpeg" - elif filename.lower().endswith('.gif'): - imageMimeType = "image/gif" - elif filename.lower().endswith('.webp'): - imageMimeType = "image/webp" - else: - imageMimeType = "image/png" # Default - - # Add image document - documents.append(ActionDocument( - documentName=filename, - documentData=imageBytes, - mimeType=imageMimeType - )) - logger.info(f"Added image document: {filename} (section: {sectionId}, {len(imageBytes)} bytes, alt: {altText})") + # Convert DocumentData to ActionDocument + if aiResponse.documents: + for docData in aiResponse.documents: + documents.append(ActionDocument( + documentName=docData.documentName, + documentData=docData.documentData, + mimeType=docData.mimeType, + sourceJson=docData.sourceJson if hasattr(docData, 'sourceJson') else None + )) + + # If no documents but content exists, create a document from content + if not documents and aiResponse.content: + # Determine document name from metadata + docName = f"document.{resultType}" + if aiResponse.metadata and aiResponse.metadata.filename: + docName = aiResponse.metadata.filename + elif aiResponse.metadata and aiResponse.metadata.title: + import re + sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", aiResponse.metadata.title) + sanitized = re.sub(r"_+", "_", sanitized).strip("_") + if sanitized: + if not sanitized.lower().endswith(f".{resultType}"): + docName = f"{sanitized}.{resultType}" else: - logger.warning(f"Image {idx + 1} (section: {sectionId}) has no base64Data, skipping") - except Exception as e: - logger.error(f"Error adding image document {idx + 1}: {str(e)}", exc_info=True) - continue - else: - logger.debug("No images returned from renderer") - - # Note: Document creation is handled by the workflow system - # We just return the rendered content and images in ActionResult - - if progressLogging: - self.services.chat.progressLogFinish(operationId, True) + docName = sanitized + + # Determine mime type + mimeType = "text/plain" + if resultType == "html": + mimeType = "text/html" + elif resultType == "json": + mimeType = "application/json" + elif resultType == "pdf": + mimeType = "application/pdf" + elif resultType == "md": + mimeType = "text/markdown" + + documents.append(ActionDocument( + documentName=docName, + documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content, + mimeType=mimeType + )) return ActionResult.isSuccess(documents=documents) except Exception as e: - logger.error(f"Error in hierarchical document generation: {str(e)}") - if progressLogging: - self.services.chat.progressLogFinish(operationId, False) + logger.error(f"Error in document generation: {str(e)}") return ActionResult.isFailure(error=str(e)) diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 2468d949..5abc57cd 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -8,11 +8,12 @@ Universal AI document processing action. import logging import time +import json from typing import Dict, Any, List, Optional from modules.workflows.methods.methodBase import action from modules.datamodels.datamodelChat import ActionResult, ActionDocument from modules.datamodels.datamodelAi import AiCallOptions -from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy, ContentPart +from modules.datamodels.datamodelExtraction import ContentPart logger = logging.getLogger(__name__) @@ -82,8 +83,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available logger.info(f"Using result type: {resultType} -> {output_extension}") - # Phase 7.3: Extract content first if documents provided, then use contentParts - # Check if contentParts are already provided (preferred path) + # Check if contentParts are already provided (from context.extractContent or other sources) contentParts: Optional[List[ContentPart]] = None if "contentParts" in parameters: contentParts = parameters.get("contentParts") @@ -95,63 +95,42 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty") contentParts = None - # If contentParts not provided but documentList is, extract content first - if not contentParts and documentList.references: - self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if not chatDocuments: - logger.warning("No documents found in documentList") - else: - logger.info(f"Extracting content from {len(chatDocuments)} documents") - - # Prepare extraction options (use defaults if not provided) - extractionOptions = parameters.get("extractionOptions") - if not extractionOptions: - extractionOptions = ExtractionOptions( - prompt="Extract all content from the document", - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - # Extract content using extraction service with hierarchical progress logging - # Pass operationId for per-document progress tracking - extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions, operationId=operationId) - - # Combine all ContentParts from all extracted results - contentParts = [] - for extracted in extractedResults: - if extracted.parts: - contentParts.extend(extracted.parts) - - logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents") - # Update progress - preparing AI call self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call") - # Build options with only resultFormat - let service layer handle all other parameters + # Build options output_format = output_extension.replace('.', '') or 'txt' options = AiCallOptions( resultFormat=output_format - # Removed all model parameters - service layer will analyze prompt and determine optimal parameters ) # Update progress - calling AI self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI") - # Use unified callAiContent method with contentParts (extraction is now separate) - aiResponse = await self.services.ai.callAiContent( - prompt=aiPrompt, - options=options, - contentParts=contentParts, # Already extracted (or None if no documents) - outputFormat=output_format, - parentOperationId=operationId - ) + # Use unified callAiContent method + # If contentParts provided (pre-extracted), use them directly + # Otherwise, pass documentList and let callAiContent handle Phases 5A-5E internally + # Note: ContentExtracted documents (from context.extractContent) are now handled + # automatically in _extractAndPrepareContent() (Phase 5B) + if contentParts: + # Pre-extracted ContentParts - use them directly + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + contentParts=contentParts, # Pre-extracted ContentParts + outputFormat=output_format, + parentOperationId=operationId + ) + else: + # Pass documentList - callAiContent handles Phases 5A-5E internally + # This includes automatic detection of ContentExtracted documents + aiResponse = await self.services.ai.callAiContent( + prompt=aiPrompt, + options=options, + documentList=documentList, # callAiContent macht Phasen 5A-5E + outputFormat=output_format, + parentOperationId=operationId + ) # Update progress - processing result self.services.chat.progressLogUpdate(operationId, 0.8, "Processing result") diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 7595c2eb..881b007d 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -15,9 +15,7 @@ from .actions.process import process from .actions.webResearch import webResearch from .actions.summarizeDocument import summarizeDocument from .actions.translateDocument import translateDocument -from .actions.convert import convert from .actions.convertDocument import convertDocument -from .actions.extractData import extractData from .actions.generateDocument import generateDocument logger = logging.getLogger(__name__) @@ -192,69 +190,6 @@ class MethodAi(MethodBase): }, execute=translateDocument.__get__(self, self.__class__) ), - "convert": WorkflowActionDefinition( - actionId="ai.convert", - description="Convert documents/data between different formats with specific formatting options", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to convert" - ), - "inputFormat": WorkflowActionParameter( - name="inputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx", "txt"], - required=True, - description="Source format" - ), - "outputFormat": WorkflowActionParameter( - name="outputFormat", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["csv", "json", "xlsx", "txt"], - required=True, - description="Target format" - ), - "columnsPerRow": WorkflowActionParameter( - name="columnsPerRow", - type="int", - frontendType=FrontendType.NUMBER, - required=False, - description="For CSV output, number of columns per row. Default: auto-detect", - validation={"min": 1, "max": 100} - ), - "delimiter": WorkflowActionParameter( - name="delimiter", - type="str", - frontendType=FrontendType.TEXT, - required=False, - default=",", - description="For CSV output, delimiter character" - ), - "includeHeader": WorkflowActionParameter( - name="includeHeader", - type="bool", - frontendType=FrontendType.CHECKBOX, - required=False, - default=True, - description="For CSV output, whether to include header row" - ), - "language": WorkflowActionParameter( - name="language", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["de", "en", "fr"], - required=False, - default="en", - description="Language for output" - ) - }, - execute=convert.__get__(self, self.__class__) - ), "convertDocument": WorkflowActionDefinition( actionId="ai.convertDocument", description="Convert documents between different formats (PDF→Word, Excel→CSV, etc.)", @@ -285,45 +220,6 @@ class MethodAi(MethodBase): }, execute=convertDocument.__get__(self, self.__class__) ), - "extractData": WorkflowActionDefinition( - actionId="ai.extractData", - description="Extract structured data from documents (key-value pairs, entities, facts, etc.)", - parameters={ - "documentList": WorkflowActionParameter( - name="documentList", - type="List[str]", - frontendType=FrontendType.DOCUMENT_REFERENCE, - required=True, - description="Document reference(s) to extract data from" - ), - "dataStructure": WorkflowActionParameter( - name="dataStructure", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["flat", "nested", "list"], - required=False, - default="nested", - description="Desired data structure" - ), - "fields": WorkflowActionParameter( - name="fields", - type="List[str]", - frontendType=FrontendType.MULTISELECT, - required=False, - description="Specific fields/properties to extract (e.g., [name, date, amount])" - ), - "resultType": WorkflowActionParameter( - name="resultType", - type="str", - frontendType=FrontendType.SELECT, - frontendOptions=["json", "csv", "xlsx"], - required=False, - default="json", - description="Output format" - ) - }, - execute=extractData.__get__(self, self.__class__) - ), "generateDocument": WorkflowActionDefinition( actionId="ai.generateDocument", description="Generate documents from scratch or based on templates/inputs", @@ -371,9 +267,7 @@ class MethodAi(MethodBase): self.webResearch = webResearch.__get__(self, self.__class__) self.summarizeDocument = summarizeDocument.__get__(self, self.__class__) self.translateDocument = translateDocument.__get__(self, self.__class__) - self.convert = convert.__get__(self, self.__class__) self.convertDocument = convertDocument.__get__(self, self.__class__) - self.extractData = extractData.__get__(self, self.__class__) self.generateDocument = generateDocument.__get__(self, self.__class__) def _format_timestamp_for_filename(self) -> str: diff --git a/modules/workflows/methods/methodContext/actions/extractContent.py b/modules/workflows/methods/methodContext/actions/extractContent.py index 8c5fd5fb..949ac63d 100644 --- a/modules/workflows/methods/methodContext/actions/extractContent.py +++ b/modules/workflows/methods/methodContext/actions/extractContent.py @@ -19,10 +19,21 @@ logger = logging.getLogger(__name__) @action async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: """ - Extract content from documents (separate from AI calls). + Extract raw content parts from documents without AI processing. - This action performs pure content extraction without AI processing. - The extracted ContentParts can then be used by subsequent AI processing actions. + This action performs pure content extraction WITHOUT AI/OCR processing. + It returns ContentParts with different typeGroups: + - "text": Extracted text from text-based formats (PDF text layers, Word docs, etc.) + - "image": Images as base64-encoded data (NOT converted to text, no OCR) + - "table": Tables as structured data + - "structure": Structured content (JSON, etc.) + - "container": Container elements (PDF pages, etc.) + + IMPORTANT: + - Images are returned as base64 data, NOT as extracted text + - No OCR is performed - images are preserved as visual elements + - Text extraction only works for text-based formats (not images) + - The extracted ContentParts can then be used by subsequent AI processing actions Parameters: - documentList (list, required): Document reference(s) to extract content from. @@ -30,7 +41,8 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: Returns: - ActionResult with ActionDocument containing ContentExtracted objects - - ContentExtracted.parts contains List[ContentPart] (already chunked if needed) + - ContentExtracted.parts contains List[ContentPart] with various typeGroups + - Each ContentPart has a typeGroup indicating its type (text, image, table, etc.) """ try: # Init progress logger @@ -79,12 +91,26 @@ async def extractContent(self, parameters: Dict[str, Any]) -> ActionResult: # Convert dict to ExtractionOptions object if needed, or create defaults if extractionOptionsParam: if isinstance(extractionOptionsParam, dict): + # Ensure required fields are present + if "prompt" not in extractionOptionsParam: + extractionOptionsParam["prompt"] = "Extract all content from the document" + if "mergeStrategy" not in extractionOptionsParam: + extractionOptionsParam["mergeStrategy"] = MergeStrategy( + mergeType="concatenate", + groupBy="typeGroup", + orderBy="id" + ) # Convert dict to ExtractionOptions object - extractionOptions = ExtractionOptions(**extractionOptionsParam) + try: + extractionOptions = ExtractionOptions(**extractionOptionsParam) + except Exception as e: + logger.warning(f"Failed to create ExtractionOptions from dict: {str(e)}, using defaults") + extractionOptions = None elif isinstance(extractionOptionsParam, ExtractionOptions): extractionOptions = extractionOptionsParam else: # Invalid type, use defaults + logger.warning(f"Invalid extractionOptions type: {type(extractionOptionsParam)}, using defaults") extractionOptions = None else: extractionOptions = None diff --git a/modules/workflows/methods/methodContext/methodContext.py b/modules/workflows/methods/methodContext/methodContext.py index a635764f..942f3f85 100644 --- a/modules/workflows/methods/methodContext/methodContext.py +++ b/modules/workflows/methods/methodContext/methodContext.py @@ -50,7 +50,7 @@ class MethodContext(MethodBase): ), "extractContent": WorkflowActionDefinition( actionId="context.extractContent", - description="Extract content from documents (separate from AI calls)", + description="Extract raw content parts from documents without AI processing. Returns ContentParts with different typeGroups (text, image, table, structure, container). Images are returned as base64 data, not as extracted text. Text content is extracted from text-based formats (PDF text layers, Word docs, etc.) but NOT from images (no OCR). Use this action to prepare documents for subsequent AI processing actions.", parameters={ "documentList": WorkflowActionParameter( name="documentList", @@ -64,7 +64,7 @@ class MethodContext(MethodBase): type="dict", frontendType=FrontendType.JSON, required=False, - description="Extraction options (if not provided, defaults are used)" + description="Extraction options (if not provided, defaults are used). Note: This action does NOT use AI - it performs pure content extraction. Images are preserved as base64 data, not converted to text." ) }, execute=extractContent.__get__(self, self.__class__) diff --git a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md b/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md deleted file mode 100644 index 39c649ce..00000000 --- a/modules/workflows/processing/shared/ARCHITECTURE_IMPLEMENTATION_ANALYSIS.md +++ /dev/null @@ -1,354 +0,0 @@ -# Architecture & Implementation Analysis -## Deep Review of Hierarchical Document Generation - -**Date**: 2025-12-22 -**Status**: Critical Issues Found - ---- - -## Executive Summary - -The hierarchical document generation system is **partially implemented** but has **critical architectural mismatches** and **implementation gaps** that prevent it from working correctly. While core components exist, several fundamental issues need to be addressed. - ---- - -## ✅ What's Correctly Implemented - -### Phase 1: Core Infrastructure ✅ -- ✅ `StructureGenerator` class exists with `generateStructure()` method -- ✅ `ContentGenerator` class exists with `generateContent()` method -- ✅ `ContentIntegrator` class exists with `integrateContent()` method -- ✅ `generateDocument` action uses hierarchical approach -- ✅ Basic progress logging implemented -- ✅ Error handling with `createErrorSection()` implemented - -### Phase 2: Image Generation ✅ -- ✅ `_generateImageSection()` method implemented -- ✅ Image prompt extraction from structure -- ✅ Base64 image data storage -- ✅ Error handling for image failures - -### Phase 3: Parallel Processing ✅ -- ✅ `_generateSectionsParallel()` method implemented -- ✅ `_generateSectionsSequential()` method implemented -- ✅ Batch processing for large documents -- ✅ Progress callback system -- ✅ Exception handling in parallel execution - ---- - -## ❌ Critical Issues Found - -### Issue 1: Previous Sections Context Not Working in Parallel Mode ⚠️ **PARTIALLY FIXED** - -**Problem**: -- In parallel mode, sections within the same batch cannot see each other (correct) -- BUT: Sections in later batches should see sections from earlier batches -- **Current Status**: Code was fixed to accumulate previous sections, but needs verification - -**Location**: `subContentGenerator.py` lines 240-319 - -**Fix Applied**: -- Added `accumulatedPreviousSections` to track sections across batches -- Pass accumulated sections to each batch -- **VERIFICATION NEEDED**: Test that prompts actually show previous sections - -**Risk**: Medium - May cause continuity issues in generated content - ---- - -### Issue 2: Variable Shadowing Bug ✅ **FIXED** - -**Problem**: -- `contentType` variable was shadowed in loop, causing wrong section type in prompts - -**Location**: `subContentGenerator.py` line 676 - -**Fix Applied**: -- Renamed loop variable to `prevContentType` - -**Status**: ✅ Fixed - ---- - -### Issue 3: Missing `generation_hint` in Structure Response ✅ **FIXED** - -**Problem**: -- Structure generator creates generic hints like "Section heading" instead of meaningful hints -- AI generates same content for all headings because hints are identical - -**Location**: `subStructureGenerator.py` lines 242-269 - -**Fix Applied**: -- Added `_extractMeaningfulHint()` method to extract meaningful hints from section IDs -- Example: `section_heading_current_state` → "Current State" - -**Status**: ✅ Fixed - ---- - -### Issue 4: JSON Template Architecture Mismatch ✅ **FIXED** - -**Problem**: -- `jsonTemplateDocument` showed filled `elements` arrays, but structure generation requires empty arrays -- Template missing `generation_hint` and `complexity` fields -- Template showed `order: 0` but should start from 1 - -**Location**: `datamodelJson.py` - -**Fix Applied**: -- Updated template to show empty `elements: []` -- Added `generation_hint` to all sections -- Added `complexity` to all sections -- Changed `order` to start from 1 -- Added `title` to metadata - -**Status**: ✅ Fixed - ---- - -### Issue 5: Structure Prompt Instructions Mismatch ✅ **FIXED** - -**Problem**: -- Prompt said "All sections must have empty elements arrays" but template showed filled arrays -- Prompt didn't explicitly require `generation_hint` and `complexity` fields - -**Location**: `subStructureGenerator.py` lines 181-190 - -**Fix Applied**: -- Enhanced prompt to explicitly require `generation_hint` and `complexity` -- Clarified that template examples show structure, but elements must be empty - -**Status**: ✅ Fixed - ---- - -## ⚠️ Remaining Issues & Gaps - -### Issue 6: Missing Validation Before Content Generation ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No validation that structure has required fields before content generation -- No check that all sections have `generation_hint` before generating content - -**Expected** (from Phase 6): -```python -# Validate structure before content generation -if not validateStructure(structure): - raise ValueError("Invalid structure") -``` - -**Current**: Validation happens in `_validateAndEnhanceStructure()` but only adds missing fields, doesn't validate - -**Impact**: Low - Enhancement adds missing fields, but explicit validation would be better - -**Recommendation**: Add explicit validation method - ---- - -### Issue 7: Previous Sections Formatting Missing Content ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Previous sections formatting extracts content from `elements`, but if sections don't have elements yet (in parallel mode), it shows nothing -- Should show `generation_hint` as fallback when elements not available - -**Location**: `subContentGenerator.py` lines 671-709 - -**Current Behavior**: -- Shows content preview if elements exist -- Shows nothing if elements don't exist - -**Expected Behavior**: -- Show content preview if elements exist -- Show `generation_hint` as fallback if elements don't exist - -**Impact**: Medium - Reduces context quality in parallel generation - -**Recommendation**: Add fallback to show `generation_hint` when elements not available - ---- - -### Issue 8: Debug File Shows Raw Response, Not Validated Structure ⚠️ **NOT FIXED** - -**Problem**: -- Debug file writes `aiResponse.content` (raw AI response) before validation -- Can't verify if `generation_hint` was added by validation - -**Location**: `subStructureGenerator.py` lines 77-84 - -**Impact**: Low - Makes debugging harder but doesn't affect functionality - -**Recommendation**: Write validated structure to separate debug file - ---- - -### Issue 9: Missing Unit Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No unit tests for any components (Phase 7 requirement) -- No tests for structure generation -- No tests for content generation -- No tests for integration - -**Impact**: High - No way to verify correctness or catch regressions - -**Recommendation**: Add comprehensive unit tests - ---- - -### Issue 10: Missing Integration Tests ⚠️ **NOT IMPLEMENTED** - -**Problem**: -- No end-to-end tests -- No tests with images -- No tests with long documents -- No error scenario tests - -**Impact**: High - No verification of complete flow - -**Recommendation**: Add integration tests - ---- - -### Issue 11: Content Caching Not Optimized ⚠️ **PARTIALLY IMPLEMENTED** - -**Problem**: -- Content is extracted and cached, but: - - No cache validation (check if documents changed) - - No cache reuse verification - - Content is passed to prompts but may not be formatted efficiently - -**Expected** (from Phase 5): -- Cache validation -- Efficient formatting -- Performance testing - -**Current**: Basic caching exists but not optimized - -**Impact**: Medium - Works but could be more efficient - -**Recommendation**: Add cache validation and optimization - ---- - -### Issue 12: Renderer Updates Not Verified ⚠️ **UNKNOWN** - -**Problem**: -- Implementation plan requires renderer updates for images -- HTML renderer should create separate image files -- PDF/XLSX/PPTX renderers should embed images -- **Status unknown** - need to verify renderers handle images correctly - -**Impact**: High - Images may not render correctly - -**Recommendation**: Verify all renderers handle images correctly - ---- - -## 📋 Architecture Compliance Check - -### Data Structure Compliance ✅ - -| Field | Required | Implemented | Status | -|-------|----------|-------------|--------| -| `metadata.title` | Yes | ✅ | ✅ | -| `metadata.split_strategy` | Yes | ✅ | ✅ | -| `sections[].id` | Yes | ✅ | ✅ | -| `sections[].content_type` | Yes | ✅ | ✅ | -| `sections[].complexity` | Yes | ✅ | ✅ | -| `sections[].generation_hint` | Yes | ✅ | ✅ | -| `sections[].order` | Yes | ✅ | ✅ | -| `sections[].elements` | Yes | ✅ | ✅ | -| `sections[].image_prompt` | Image only | ✅ | ✅ | - -### Component Method Compliance ✅ - -| Component | Method | Required | Implemented | Status | -|-----------|--------|----------|-------------|--------| -| StructureGenerator | `generateStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_createStructurePrompt()` | Yes | ✅ | ✅ | -| StructureGenerator | `_identifySectionComplexity()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractImagePrompts()` | Yes | ✅ | ✅ | -| StructureGenerator | `_validateAndEnhanceStructure()` | Yes | ✅ | ✅ | -| StructureGenerator | `_extractMeaningfulHint()` | Yes | ✅ | ✅ | -| ContentGenerator | `generateContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionContent()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSimpleSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateComplexTextSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateImageSection()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsParallel()` | Yes | ✅ | ✅ | -| ContentGenerator | `_generateSectionsSequential()` | Yes | ✅ | ✅ | -| ContentGenerator | `_createSectionPrompt()` | Yes | ✅ | ✅ | -| ContentIntegrator | `integrateContent()` | Yes | ✅ | ✅ | -| ContentIntegrator | `validateCompleteness()` | Yes | ✅ | ✅ | -| ContentIntegrator | `createErrorSection()` | Yes | ✅ | ✅ | - ---- - -## 🎯 Priority Fixes Needed - -### Critical (Must Fix) -1. ✅ **Issue 2**: Variable shadowing bug - **FIXED** -2. ✅ **Issue 3**: Missing generation_hint - **FIXED** -3. ✅ **Issue 4**: JSON template mismatch - **FIXED** -4. ✅ **Issue 5**: Prompt instructions mismatch - **FIXED** -5. ⚠️ **Issue 1**: Previous sections context - **NEEDS VERIFICATION** - -### High Priority (Should Fix) -6. ⚠️ **Issue 12**: Renderer image handling - **NEEDS VERIFICATION** -7. ⚠️ **Issue 9**: Missing unit tests - **NOT IMPLEMENTED** -8. ⚠️ **Issue 10**: Missing integration tests - **NOT IMPLEMENTED** - -### Medium Priority (Nice to Have) -9. ⚠️ **Issue 7**: Previous sections formatting fallback - **PARTIALLY IMPLEMENTED** -10. ⚠️ **Issue 11**: Content caching optimization - **PARTIALLY IMPLEMENTED** -11. ⚠️ **Issue 6**: Structure validation - **NOT IMPLEMENTED** -12. ⚠️ **Issue 8**: Debug file improvements - **NOT IMPLEMENTED** - ---- - -## ✅ Summary - -### What Works -- Core infrastructure is implemented -- Image generation is integrated -- Parallel processing is implemented -- Error handling is in place -- Progress logging works - -### What's Fixed (This Session) -- Variable shadowing bug -- Missing generation_hint extraction -- JSON template architecture mismatch -- Prompt instructions clarity -- Previous sections tracking (needs verification) - -### What Needs Work -- Unit and integration tests -- Renderer verification -- Previous sections formatting fallback -- Cache optimization -- Structure validation - -### Overall Status -**Architecture**: ✅ **85% Compliant** -**Implementation**: ✅ **80% Complete** -**Testing**: ❌ **0% Complete** -**Production Ready**: ⚠️ **Not Yet** (needs testing and verification) - ---- - -## Next Steps - -1. **Verify Issue 1 Fix**: Test that previous sections are correctly tracked in parallel mode -2. **Verify Issue 12**: Test that all renderers handle images correctly -3. **Add Unit Tests**: Start with critical components (StructureGenerator, ContentGenerator) -4. **Add Integration Tests**: Test end-to-end flow with various scenarios -5. **Improve Previous Sections Formatting**: Add fallback to show generation_hint when elements not available -6. **Add Structure Validation**: Explicit validation before content generation -7. **Optimize Content Caching**: Add cache validation and efficient formatting - ---- - -**Analysis Complete**: 2025-12-22 - diff --git a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index d0a59e80..00000000 --- a/modules/workflows/processing/shared/CONCEPT_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,459 +0,0 @@ -# Concept: Hierarchical Document Generation with Image Integration - -## Executive Summary - -This concept proposes a **three-phase hierarchical approach** to document generation that enables proper image integration and handles complex documents efficiently. - -**Key Decisions**: -- ✅ **Performance**: Parallel processing with ChatLog progress messages -- ✅ **Error Handling**: Skip failed sections, show error messages -- ✅ **Image Storage**: Store as base64 in JSON (renderers need direct access) -- ✅ **Backward Compatibility**: Not needed - implement as new default - -**Renderer Status**: -- ✅ **Ready**: Text, Markdown, DOCX renderers -- ⚠️ **Needs Update**: HTML (create separate image files), PDF (embed images) -- ⚠️ **Needs Implementation**: XLSX, PPTX (add image support) - -## Problem Statement - -Currently, the document generation system has the following limitations: - -1. **No Image Integration**: Images are generated separately but cannot be embedded into document structures -2. **Single-Pass Generation**: Documents are generated in one AI call, making it difficult to handle complex sections (long text, images, chapters) -3. **Repeated Extraction**: Content extraction may happen multiple times unnecessarily -4. **No Structured Approach**: No mechanism to first define document structure, then populate sections - -## Current Architecture Analysis - -### Current Flow: -``` -User Request → ai.generateDocument → ai.process → AI JSON Generation → Renderer → Final Document -``` - -### Issues: -- AI generates complete JSON structure in one pass -- Images are generated separately via `ai.generate` action -- No mechanism to integrate generated images into document structure -- JSON schema supports `image` content_type, but AI rarely generates it -- Content extraction happens per action, not cached/reused - -### Current Image Handling: -- Images can be rendered IF they exist in JSON structure (`content_type: "image"`) -- Image data expected as `base64Data` in elements -- Renderers support image rendering (Docx, PDF, HTML, etc.) -- But images are never generated WITHIN document generation - -## Proposed Solution: Hierarchical Document Generation - -### Core Concept - -**Three-Phase Approach:** -1. **Structure Generation Phase**: Generate document skeleton with section placeholders -2. **Content Generation Phase**: Generate content for each section (text or image) via sub-prompts -3. **Integration Phase**: Merge all generated content into final document structure - -### Architecture Overview - -``` -┌─────────────────────────────────────────────────────────────┐ -│ Phase 1: Structure Generation │ -│ - Generate document skeleton │ -│ - Identify sections (text, image, complex) │ -│ - Create section placeholders with metadata │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 2: Content Generation (Tree-like) │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 1: Heading (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 2: Paragraph (simple) │ │ -│ │ → Generate directly │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 3: Image (complex) │ │ -│ │ → Sub-prompt: Generate image │ │ -│ │ → Store image data │ │ -│ │ → Create image section with base64Data │ │ -│ └──────────────────────────────────────────────┘ │ -│ │ -│ ┌──────────────────────────────────────────────┐ │ -│ │ Section 4: Long Chapter (complex) │ │ -│ │ → Sub-prompt: Generate chapter content │ │ -│ │ → Split into subsections if needed │ │ -│ └──────────────────────────────────────────────┘ │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 3: Integration │ -│ - Merge all generated content │ -│ - Replace placeholders with actual data │ -│ - Validate structure completeness │ -│ - Render to final format │ -└─────────────────────────────────────────────────────────────┘ -``` - -## Detailed Design - -### Phase 1: Structure Generation - -**Purpose**: Create document skeleton with section metadata - -**Process**: -1. AI generates document structure with sections -2. Each section includes: - - `id`: Unique identifier - - `content_type`: Type (heading, paragraph, image, table, etc.) - - `complexity`: "simple" or "complex" - - `generation_hint`: Instructions for content generation - - `order`: Section order - - `elements`: Empty or placeholder - -**Example Structure**: -```json -{ - "metadata": { - "title": "Children's Bedtime Story", - "split_strategy": "single_document" - }, - "documents": [{ - "id": "doc_1", - "sections": [ - { - "id": "section_title", - "content_type": "heading", - "complexity": "simple", - "generation_hint": "Story title", - "order": 1, - "elements": [] - }, - { - "id": "section_intro", - "content_type": "paragraph", - "complexity": "simple", - "generation_hint": "Introduction paragraph", - "order": 2, - "elements": [] - }, - { - "id": "section_image_1", - "content_type": "image", - "complexity": "complex", - "generation_hint": "Illustration: Rabbit meeting owl in moonlit forest", - "image_prompt": "A small brown rabbit sitting in a peaceful forest clearing under moonlight with stars, meeting a wise owl perched on a branch", - "order": 3, - "elements": [] - }, - { - "id": "section_chapter_1", - "content_type": "paragraph", - "complexity": "complex", - "generation_hint": "First chapter: Rabbit's adventure begins", - "order": 4, - "elements": [] - } - ] - }] -} -``` - -### Phase 2: Content Generation - -**Purpose**: Generate actual content for each section - -**Process**: -1. Iterate through sections in order -2. For each section: - - **Simple sections** (heading, short paragraph): - - Generate content directly via AI - - Populate `elements` array - - **Complex sections** (image, long chapter): - - Create sub-prompt based on `generation_hint` and `image_prompt` - - Generate content via specialized action: - - Images: `ai.generate` with image generation - - Long text: `ai.process` with focused prompt - - Store generated content - - Populate `elements` array - -**Content Caching**: -- Extract content from source documents ONCE at the start -- Cache extracted content for reuse across all sections -- Pass cached content to sub-prompts to avoid re-extraction - -**Image Generation**: -- For `content_type: "image"` sections: - - Use `image_prompt` from structure - - Call `ai.generate` action with image generation - - Receive base64 image data - - Create image element: - ```json - { - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "", - "caption": "" - } - ``` - -### Phase 3: Integration - -**Purpose**: Merge all content into final document structure - -**Process**: -1. Validate all sections have content -2. Merge generated content into structure -3. Replace placeholders with actual data -4. Finalize JSON structure -5. Render to target format (docx, pdf, html, etc.) - -## Implementation Strategy - -### New Components Needed - -1. **Structure Generator** (`structureGenerator.py`) - - Generates document skeleton - - Identifies section complexity - - Creates generation hints - -2. **Content Generator** (`contentGenerator.py`) - - Generates content for each section - - Handles simple vs complex sections - - Manages sub-prompts and image generation - - Caches extracted content - -3. **Content Integrator** (`contentIntegrator.py`) - - Merges generated content - - Validates completeness - - Finalizes document structure - -### Modified Components - -1. **`generateDocument` action** - - Implement hierarchical generation as default - - Orchestrate three phases - - Add progress logging for each phase - -2. **`process` action** - - Support content caching (extract once, reuse) - - Support sub-prompt generation for sections - -3. **Prompt Builder** (`subPromptBuilderGeneration.py`) - - Add structure generation prompt - - Add section-specific content prompts - - Add image generation prompt templates - -4. **Renderers** (Update required): - - **HTML Renderer**: Create separate image files and link them - - **PDF Renderer**: Embed images using reportlab - - **XLSX Renderer**: Add image embedding support - - **PPTX Renderer**: Add image embedding support - -### New Action Parameters - -**For `generateDocument`**: -- `enableImageIntegration`: boolean (default: true) -- `maxSectionLength`: int (threshold for "complex" sections, default: 500 words) -- `parallelGeneration`: boolean (default: true) - enable parallel section generation -- `progressLogging`: boolean (default: true) - send ChatLog progress updates - -**For sub-prompts**: -- `sectionContext`: Previous sections for context -- `cachedContent`: Extracted content cache (to avoid re-extraction) -- `targetSection`: Section metadata -- `previousSections`: Array of already-generated sections for continuity - -## Benefits - -1. **Image Integration**: Images can be generated and embedded into documents -2. **Structured Approach**: Clear separation of structure and content -3. **Efficiency**: Content extracted once, reused across sections -4. **Scalability**: Can handle very long documents by splitting into sections -5. **Quality**: Better control over complex sections (images, long chapters) -6. **Flexibility**: Can generate different content types per section - -## Migration Strategy - -**Note**: No backwards compatibility needed - can implement directly as new default. - -1. **Phase 1**: Implement hierarchical generation as new default -2. **Phase 2**: Update renderers (HTML, PDF, XLSX, PPTX) for image support -3. **Phase 3**: Testing and refinement -4. **Phase 4**: Remove old single-pass mode (or keep as internal fallback only) - -## Example Workflow - -**User Request**: "Create a children's bedtime story with 5 illustrations" - -**Phase 1 Output**: -```json -{ - "metadata": {"title": "Flöckchen's Adventure"}, - "documents": [{ - "sections": [ - {"id": "title", "content_type": "heading", "complexity": "simple", ...}, - {"id": "intro", "content_type": "paragraph", "complexity": "simple", ...}, - {"id": "img1", "content_type": "image", "complexity": "complex", - "image_prompt": "Rabbit meeting owl", ...}, - {"id": "chapter1", "content_type": "paragraph", "complexity": "complex", ...}, - {"id": "img2", "content_type": "image", "complexity": "complex", ...}, - ... - ] - }] -} -``` - -**Phase 2 Process**: -- Generate title → populate elements -- Generate intro → populate elements -- Generate image 1 → call `ai.generate`, store base64 → populate elements -- Generate chapter 1 → sub-prompt → populate elements -- Generate image 2 → call `ai.generate`, store base64 → populate elements -- ... - -**Phase 3 Output**: Complete document with all sections populated, ready for rendering - -## Renderer Readiness Assessment - -### Current Renderer Status for Image Handling: - -1. **Text Renderer** (`rendererText.py`): ✅ **READY** - - Skips images, shows placeholder: `[Image: altText]` - - No changes needed - -2. **Markdown Renderer** (`rendererMarkdown.py`): ✅ **READY** - - Shows placeholder with truncated base64: `![altText](data:image/png;base64,...)` - - No changes needed (markdown limitation) - -3. **HTML Renderer** (`rendererHtml.py`): ⚠️ **NEEDS UPDATE** - - Currently: Embeds base64 directly in `` tag as data URI - - **Required Change**: Create separate image files and link to them - - Implementation: Generate image files (e.g., `image_1.png`, `image_2.png`) alongside HTML - - Update `` tags to use relative paths: `...` - - Return multiple files: HTML file + image files - -4. **PDF Renderer** (`rendererPdf.py`): ⚠️ **NEEDS UPDATE** - - Currently: Shows placeholder `[Image: altText]` - - **Required Change**: Embed images directly in PDF using reportlab - - Implementation: Use `reportlab.platypus.Image()` with base64 decoded bytes - -5. **DOCX Renderer** (`rendererDocx.py`): ✅ **READY** - - Embeds images directly using `doc.add_picture()` - - Adds captions below images - - No changes needed - -6. **XLSX Renderer** (`rendererXlsx.py`): ⚠️ **NEEDS IMPLEMENTATION** - - Currently: No image handling found - - **Required Change**: Add image support using openpyxl - - Implementation: Use `openpyxl.drawing.image.Image()` to embed images in cells - - Store images in worksheet cells or as floating images - -7. **PPTX Renderer** (`rendererPptx.py`): ⚠️ **NEEDS IMPLEMENTATION** - - Currently: No image handling found - - **Required Change**: Add image support using python-pptx - - Implementation: Use `slide.shapes.add_picture()` to add images to slides - -### Renderer Update Requirements: - -**Priority 1 (Critical for HTML output)**: -- HTML Renderer: Create separate image files and link them - -**Priority 2 (Important for document formats)**: -- PDF Renderer: Embed images using reportlab -- XLSX Renderer: Add image embedding support -- PPTX Renderer: Add image embedding support - -## Answers to Open Questions - -### 1. Performance: How to handle very large documents (100+ sections)? - -**Answer**: Use parallel processing where possible, with progress ChatLog messages. - -**Implementation Strategy**: -- **Parallel Section Generation**: Generate independent sections in parallel using asyncio -- **Batch Processing**: Process sections in batches (e.g., 10 sections at a time) -- **Progress Tracking**: Send ChatLog progress updates: - - "Generating structure..." (Phase 1) - - "Generating content for section X/Y..." (Phase 2) - - "Generating image for section X..." (Phase 2 - images) - - "Merging content..." (Phase 3) - - "Rendering final document..." (Phase 3) -- **Streaming**: For very large documents, consider streaming partial results - -**Example Progress Messages**: -``` -Phase 1: Structure Generation (0% → 33%) -Phase 2: Content Generation (33% → 90%) - - Section 1/10: Heading (34%) - - Section 2/10: Paragraph (40%) - - Section 3/10: Image generation (50%) - - Section 4/10: Chapter (60%) - ... -Phase 3: Integration & Rendering (90% → 100%) -``` - -### 2. Error Handling: What if one section fails? - -**Answer**: Skip failed sections, keep section title and type, show error message in the section. - -**Implementation Strategy**: -- **Graceful Degradation**: Continue processing remaining sections -- **Error Section**: Create error placeholder section: - ```json - { - "id": "section_failed_3", - "content_type": "paragraph", - "elements": [{ - "text": "[ERROR: Failed to generate content for this section. Error: ]" - }], - "order": 3, - "error": true, - "errorMessage": "" - } - ``` -- **Logging**: Log errors for debugging but don't fail entire document -- **User Notification**: Include error count in final progress message - -### 3. Image Storage: Where to store generated images? - -**Answer**: Store images in JSON as base64, as renderers need them afterwards. - -**Implementation Strategy**: -- **In-Memory Storage**: Keep base64 strings in JSON structure during generation -- **JSON Structure**: Store in section elements: - ```json - { - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "Image description", - "caption": "Optional caption" - } - ``` -- **Memory Management**: For very large images, consider compression or chunking -- **Renderer Access**: All renderers can access `base64Data` directly from JSON -- **HTML Special Case**: HTML renderer will extract base64, decode, and save as separate files during rendering - -### 4. Backward Compatibility: How to ensure existing workflows still work? - -**Answer**: No backwards compatibility needed. - -**Implementation Strategy**: -- **New Default**: Hierarchical generation becomes the default mode -- **Clean Migration**: All document generation uses hierarchical approach -- **No Fallback**: Remove single-pass mode (or keep as internal fallback only) -- **Breaking Change**: Acceptable since this is a new feature/enhancement - -## Next Steps - -1. **Review and Approval**: Get feedback on concept -2. **Detailed Design**: Design API and data structures -3. **Prototype**: Implement Phase 1 (structure generation) -4. **Testing**: Test with real use cases -5. **Full Implementation**: Implement all phases -6. **Migration**: Migrate existing workflows - diff --git a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index 55a0c35c..00000000 --- a/modules/workflows/processing/shared/DESIGN_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,1067 +0,0 @@ -# Detailed Design: Hierarchical Document Generation with Image Integration - -## Table of Contents - -1. [Architecture Overview](#architecture-overview) -2. [Data Structures](#data-structures) -3. [Component Design](#component-design) -4. [API Design](#api-design) -5. [Image Handling](#image-handling) -6. [Progress Logging](#progress-logging) -7. [Error Handling](#error-handling) -8. [Performance Considerations](#performance-considerations) - -## Architecture Overview - -### System Flow - -``` -┌─────────────────────────────────────────────────────────────┐ -│ User Request: generateDocument │ -│ Parameters: prompt, documentList, resultType, etc. │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 1: Structure Generation │ -│ - Extract content from documentList (if provided) │ -│ - Cache extracted content │ -│ - Generate document skeleton with sections │ -│ - Identify section complexity │ -│ - Create generation hints │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 2: Content Generation (Parallel) │ -│ │ -│ Simple Sections (heading, short paragraph): │ -│ ┌────────────────────────────────────────┐ │ -│ │ Generate content directly via AI │ │ -│ │ Populate elements array │ │ -│ └────────────────────────────────────────┘ │ -│ │ -│ Complex Sections (image, long chapter): │ -│ ┌────────────────────────────────────────┐ │ -│ │ Create sub-prompt │ │ -│ │ Generate content (text or image) │ │ -│ │ Store in elements array │ │ -│ └────────────────────────────────────────┘ │ -│ │ -│ Progress Updates: │ -│ - "Generating section X/Y..." │ -│ - "Generating image for section X..." │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Phase 3: Integration & Rendering │ -│ - Validate all sections have content │ -│ - Merge generated content into structure │ -│ - Replace placeholders with actual data │ -│ - Render to target format (docx, pdf, html, etc.) │ -└─────────────────────────────────────────────────────────────┘ - ↓ -┌─────────────────────────────────────────────────────────────┐ -│ Final Document(s) │ -│ - Single document (docx, pdf, html, etc.) │ -│ - Or multiple files (html + image files) │ -└─────────────────────────────────────────────────────────────┘ -``` - -## Data Structures - -### Document Structure (Phase 1 Output) - -```python -{ - "metadata": { - "title": str, - "split_strategy": str, # "single_document" | "multi_document" - "source_documents": List[str], - "extraction_method": str - }, - "documents": [ - { - "id": str, - "title": str, - "filename": str, - "sections": [ - { - "id": str, - "content_type": str, # "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - "complexity": str, # "simple" | "complex" - "generation_hint": str, - "image_prompt": Optional[str], # Only for image sections - "order": int, - "elements": [], # Empty initially, populated in Phase 2 - "metadata": Optional[Dict[str, Any]] - } - ] - } - ] -} -``` - -### Section Content (Phase 2 Output) - -**Simple Section (heading)**: -```python -{ - "id": "section_title", - "content_type": "heading", - "elements": [ - { - "level": int, - "text": str - } - ], - "order": 1 -} -``` - -**Simple Section (paragraph)**: -```python -{ - "id": "section_intro", - "content_type": "paragraph", - "elements": [ - { - "text": str - } - ], - "order": 2 -} -``` - -**Complex Section (image)**: -```python -{ - "id": "section_image_1", - "content_type": "image", - "elements": [ - { - "url": "data:image/png;base64,", - "base64Data": str, # Full base64 encoded image - "altText": str, - "caption": Optional[str] - } - ], - "order": 3 -} -``` - -**Error Section**: -```python -{ - "id": "section_failed_4", - "content_type": "paragraph", - "elements": [ - { - "text": f"[ERROR: Failed to generate content for this section. Error: {error_message}]" - } - ], - "order": 4, - "error": True, - "errorMessage": str, - "originalContentType": str # Original content_type that failed -} -``` - -### Content Cache - -```python -{ - "extractedContent": List[ContentPart], # From extraction service - "extractionTimestamp": float, - "sourceDocuments": List[str] # Document IDs -} -``` - -### Generation Context - -```python -{ - "userPrompt": str, - "cachedContent": ContentCache, - "previousSections": List[Dict[str, Any]], # Already generated sections - "targetSection": Dict[str, Any], # Section to generate - "documentMetadata": Dict[str, Any] -} -``` - -## Component Design - -### 1. StructureGenerator - -**Purpose**: Generate document skeleton with section placeholders - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subStructureGenerator.py` - -**Methods**: -```python -class StructureGenerator: - async def generateStructure( - self, - userPrompt: str, - documentList: Optional[DocumentReferenceList], - cachedContent: Optional[ContentCache], - services: Any - ) -> Dict[str, Any]: - """ - Generate document structure with sections. - - Returns: - Document structure with empty elements arrays - """ - - def _createStructurePrompt( - self, - userPrompt: str, - cachedContent: Optional[ContentCache], - services: Any - ) -> str: - """ - Create prompt for structure generation. - """ - - def _identifySectionComplexity( - self, - section: Dict[str, Any], - userPrompt: str - ) -> str: - """ - Identify if section is simple or complex. - - Rules: - - Images: always complex - - Long chapters (>maxSectionLength words): complex - - Others: simple - """ - - def _extractImagePrompts( - self, - structure: Dict[str, Any], - userPrompt: str - ) -> Dict[str, str]: - """ - Extract image generation prompts from structure and user prompt. - Maps section_id -> image_prompt - """ -``` - -### 2. ContentGenerator - -**Purpose**: Generate content for each section - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentGenerator.py` - -**Methods**: -```python -class ContentGenerator: - async def generateContent( - self, - structure: Dict[str, Any], - cachedContent: Optional[ContentCache], - userPrompt: str, - services: Any, - progressCallback: Optional[Callable] = None - ) -> Dict[str, Any]: - """ - Generate content for all sections in structure. - - Args: - structure: Document structure from Phase 1 - cachedContent: Extracted content cache - userPrompt: Original user prompt - services: Services instance - progressCallback: Function to call for progress updates - - Returns: - Complete document structure with populated elements - """ - - async def _generateSectionContent( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for a single section. - - Returns: - Section with populated elements array - """ - - async def _generateSimpleSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for simple section (heading, paragraph). - """ - - async def _generateImageSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate image for image section. - Calls ai.generate action with image generation. - """ - - async def _generateComplexTextSection( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any - ) -> Dict[str, Any]: - """ - Generate content for complex text section (long chapter). - Uses focused sub-prompt. - """ - - async def _generateSectionsParallel( - self, - sections: List[Dict[str, Any]], - context: GenerationContext, - services: Any, - progressCallback: Optional[Callable] = None - ) -> List[Dict[str, Any]]: - """ - Generate content for multiple sections in parallel. - Uses asyncio.gather for parallel execution. - """ - - def _createSectionPrompt( - self, - section: Dict[str, Any], - context: GenerationContext - ) -> str: - """ - Create sub-prompt for section content generation. - """ -``` - -### 3. ContentIntegrator - -**Purpose**: Merge generated content and render final document - -**Location**: `poweron/gateway/modules/services/serviceGeneration/subContentIntegrator.py` - -**Methods**: -```python -class ContentIntegrator: - def integrateContent( - self, - structure: Dict[str, Any], - generatedSections: List[Dict[str, Any]] - ) -> Dict[str, Any]: - """ - Merge generated sections into document structure. - - Returns: - Complete document structure ready for rendering - """ - - def validateCompleteness( - self, - document: Dict[str, Any] - ) -> Tuple[bool, List[str]]: - """ - Validate that all sections have content. - - Returns: - (is_complete, list_of_missing_sections) - """ - - def createErrorSection( - self, - originalSection: Dict[str, Any], - errorMessage: str - ) -> Dict[str, Any]: - """ - Create error placeholder section. - """ -``` - -### 4. Modified generateDocument Action - -**Location**: `poweron/gateway/modules/workflows/methods/methodAi/actions/generateDocument.py` - -**Changes**: -```python -@action -async def generateDocument(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Generate documents using hierarchical approach. - """ - # Extract parameters - prompt = parameters.get("prompt") - documentList = parameters.get("documentList", []) - resultType = parameters.get("resultType", "docx") - maxSectionLength = parameters.get("maxSectionLength", 500) - parallelGeneration = parameters.get("parallelGeneration", True) - progressLogging = parameters.get("progressLogging", True) - - # Create operation ID for progress tracking - operationId = f"doc_gen_{self.services.workflow.id}_{int(time.time())}" - parentOperationId = parameters.get('parentOperationId') - - try: - # Phase 1: Structure Generation - if progressLogging: - self.services.chat.progressLogStart( - operationId, - "Document", - "Structure Generation", - "Generating document structure...", - parentOperationId=parentOperationId - ) - - structureGenerator = StructureGenerator(self.services) - - # Extract and cache content if documentList provided - cachedContent = None - if documentList: - # Extract content once - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if chatDocuments: - extractionOptions = ExtractionOptions( - prompt="Extract all content from documents", - mergeStrategy=MergeStrategy(mergeType="concatenate") - ) - extractedResults = self.services.extraction.extractContent( - chatDocuments, - extractionOptions - ) - cachedContent = { - "extractedContent": extractedResults, - "extractionTimestamp": time.time(), - "sourceDocuments": [doc.id for doc in chatDocuments] - } - - # Generate structure - structure = await structureGenerator.generateStructure( - userPrompt=prompt, - documentList=documentList, - cachedContent=cachedContent, - services=self.services - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.33, "Structure generated") - - # Phase 2: Content Generation - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.34, - "Starting content generation..." - ) - - contentGenerator = ContentGenerator(self.services) - - def progressCallback(sectionIndex: int, totalSections: int, message: str): - if progressLogging: - progress = 0.34 + (0.56 * (sectionIndex / totalSections)) - self.services.chat.progressLogUpdate( - operationId, - progress, - f"Section {sectionIndex}/{totalSections}: {message}" - ) - - completeStructure = await contentGenerator.generateContent( - structure=structure, - cachedContent=cachedContent, - userPrompt=prompt, - services=self.services, - progressCallback=progressCallback - ) - - if progressLogging: - self.services.chat.progressLogUpdate(operationId, 0.90, "Content generated") - - # Phase 3: Integration & Rendering - if progressLogging: - self.services.chat.progressLogUpdate( - operationId, - 0.91, - "Rendering final document..." - ) - - # Use existing renderReport method - title = structure.get("metadata", {}).get("title", "Generated Document") - renderedContent, mimeType = await self.services.generation.renderReport( - extractedContent=completeStructure, - outputFormat=resultType, - title=title, - userPrompt=prompt, - aiService=self.services.ai - ) - - # Create document - document = self.services.generation._createDocument( - fileName=f"document.{resultType}", - mimeType=mimeType, - content=renderedContent, - base64encoded=(mimeType not in ["text/plain", "text/html", "text/markdown"]), - messageId=None - ) - - if progressLogging: - self.services.chat.progressLogFinish(operationId, True) - - return ActionResult.isSuccess( - documents=[ActionDocument( - documentName=f"document.{resultType}", - documentData=renderedContent, - mimeType=mimeType - )] - ) - - except Exception as e: - logger.error(f"Error in hierarchical document generation: {str(e)}") - if progressLogging: - self.services.chat.progressLogFinish(operationId, False) - return ActionResult.isFailure(error=str(e)) -``` - -## API Design - -### Structure Generation Prompt - -```python -def _createStructurePrompt( - userPrompt: str, - cachedContent: Optional[ContentCache], - services: Any -) -> str: - """ - Create prompt for structure generation. - """ - prompt = f""" -{'='*80} -USER REQUEST: -{'='*80} -{userPrompt} -{'='*80} - -TASK: Generate a document STRUCTURE (skeleton) with sections. -Do NOT generate actual content yet - only the structure. - -{'='*80} -EXTRACTED CONTENT (if available): -{'='*80} -{_formatCachedContent(cachedContent) if cachedContent else "No source documents provided."} -{'='*80} - -INSTRUCTIONS: -1. Analyze the user request and extracted content -2. Create a document structure with sections -3. For each section, specify: - - id: Unique identifier - - content_type: "heading" | "paragraph" | "image" | "table" | "bullet_list" | "code_block" - - complexity: "simple" (can generate directly) or "complex" (needs sub-prompt) - - generation_hint: Brief description of what content should be generated - - image_prompt: (only for image sections) Detailed prompt for image generation - - order: Section order number - - elements: [] (empty array - will be populated later) - -4. Identify image sections: - - If user requests illustrations/images, create image sections - - Add image_prompt field with detailed description - - Set complexity to "complex" - -5. Identify complex text sections: - - Long chapters (>500 words expected) should be marked as "complex" - - Short paragraphs/headings should be "simple" - -6. Return ONLY valid JSON following this structure: -{{ - "metadata": {{ - "title": "Document Title", - "split_strategy": "single_document", - "source_documents": [], - "extraction_method": "ai_generation" - }}, - "documents": [ - {{ - "id": "doc_1", - "title": "Document Title", - "filename": "document.json", - "sections": [ - {{ - "id": "section_1", - "content_type": "heading", - "complexity": "simple", - "generation_hint": "Main title", - "order": 1, - "elements": [] - }}, - {{ - "id": "section_2", - "content_type": "image", - "complexity": "complex", - "generation_hint": "Illustration for chapter 1", - "image_prompt": "Detailed description for image generation", - "order": 2, - "elements": [] - }} - ] - }} - ] -}} - -Return ONLY the JSON structure. No explanations. -""" - return prompt -``` - -### Section Content Generation Prompt - -```python -def _createSectionPrompt( - section: Dict[str, Any], - context: GenerationContext -) -> str: - """ - Create sub-prompt for section content generation. - """ - sectionType = section.get("content_type") - generationHint = section.get("generation_hint", "") - - prompt = f""" -{'='*80} -SECTION TO GENERATE: -{'='*80} -Type: {sectionType} -Hint: {generationHint} -{'='*80} - -CONTEXT: -- User Request: {context.userPrompt} -- Previous Sections: {len(context.previousSections)} sections already generated -- Document Title: {context.documentMetadata.get('title', 'Unknown')} - -{'='*80} -EXTRACTED CONTENT (if available): -{'='*80} -{_formatCachedContent(context.cachedContent) if context.cachedContent else "None"} -{'='*80} - -TASK: Generate content for this section ONLY. - -INSTRUCTIONS: -1. Generate content appropriate for section type: {sectionType} -2. Use the generation hint: {generationHint} -3. Consider previous sections for continuity -4. Use extracted content if relevant - -5. Return ONLY the elements array for this section: - -For heading: -{{ - "elements": [ - {{"level": 1, "text": "Heading Text"}} - ] -}} - -For paragraph: -{{ - "elements": [ - {{"text": "Paragraph text content"}} - ] -}} - -For image: -{{ - "elements": [ - {{ - "url": "data:image/png;base64,", - "base64Data": "", - "altText": "Image description", - "caption": "Optional caption" - }} - ] -}} - -Return ONLY the elements array as JSON. No other text. -""" - return prompt -``` - -## Image Handling - -### Image Generation Flow - -```python -async def _generateImageSection( - section: Dict[str, Any], - context: GenerationContext, - services: Any -) -> Dict[str, Any]: - """ - Generate image for image section. - """ - imagePrompt = section.get("image_prompt") - if not imagePrompt: - raise ValueError(f"Image section {section.get('id')} missing image_prompt") - - # Call ai.generate action with image generation - from modules.workflows.methods.methodAi.actions.generate import generate - - generateParams = { - "prompt": imagePrompt, - "resultType": "png", - "parentOperationId": context.operationId - } - - result = await generate(self=services.ai, parameters=generateParams) - - if not result.success or not result.documents: - raise ValueError(f"Image generation failed: {result.error}") - - # Extract base64 image data - imageDoc = result.documents[0] - base64Data = imageDoc.documentData - - # Create image element - section["elements"] = [{ - "url": f"data:image/png;base64,{base64Data}", - "base64Data": base64Data, - "altText": section.get("generation_hint", "Image"), - "caption": section.get("metadata", {}).get("caption") - }] - - return section -``` - -### HTML Renderer Image Handling - -**Location**: `poweron/gateway/modules/services/serviceGeneration/renderers/rendererHtml.py` - -**Changes**: -```python -async def render( - self, - extractedContent: Dict[str, Any], - title: str, - userPrompt: str = None, - aiService=None -) -> Tuple[str, str]: - """ - Render HTML with separate image files. - - Returns: - (html_content, mime_type) - """ - # Generate HTML - htmlContent = await self._generateHtmlFromJson(...) - - # Extract images and create separate files - images = self._extractImages(extractedContent) - - if images: - # Create image files - imageFiles = [] - for idx, imageData in enumerate(images): - base64Data = imageData.get("base64Data") - if base64Data: - # Decode base64 - imageBytes = base64.b64decode(base64Data) - - # Create filename - filename = f"image_{idx + 1}.png" - - # Update HTML to use relative path - htmlContent = htmlContent.replace( - f'data:image/png;base64,{base64Data}', - filename - ) - - imageFiles.append({ - "filename": filename, - "content": imageBytes, - "mimeType": "image/png" - }) - - # Return HTML + image files info - # Note: This requires modification to return multiple files - # For now, embed base64 (will be updated in implementation) - return htmlContent, "text/html" - - return htmlContent, "text/html" - -def _extractImages(self, jsonContent: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Extract all images from JSON structure. - """ - images = [] - - documents = jsonContent.get("documents", []) - if not documents: - sections = jsonContent.get("sections", []) - documents = [{"sections": sections}] - - for doc in documents: - sections = doc.get("sections", []) - for section in sections: - if section.get("content_type") == "image": - elements = section.get("elements", []) - for element in elements: - if element.get("base64Data"): - images.append(element) - - return images -``` - -## Progress Logging - -### Progress Stages - -```python -PROGRESS_STAGES = { - "structure_generation": { - "start": 0.0, - "end": 0.33, - "messages": [ - "Extracting content from documents...", - "Generating document structure...", - "Structure generated" - ] - }, - "content_generation": { - "start": 0.34, - "end": 0.90, - "messages": [ - "Starting content generation...", - "Generating section {current}/{total}...", - "Generating image for section {section_id}...", - "Content generated" - ] - }, - "integration_rendering": { - "start": 0.91, - "end": 1.0, - "messages": [ - "Rendering final document...", - "Document complete" - ] - } -} -``` - -### Progress Callback Implementation - -```python -def createProgressCallback( - operationId: str, - totalSections: int, - services: Any -) -> Callable: - """ - Create progress callback function. - """ - def progressCallback( - sectionIndex: int, - totalSections: int, - message: str - ): - # Calculate progress - baseProgress = 0.34 # Start of content generation phase - phaseProgress = 0.56 # Length of content generation phase - sectionProgress = (sectionIndex / totalSections) * phaseProgress - currentProgress = baseProgress + sectionProgress - - # Update progress log - services.chat.progressLogUpdate( - operationId, - currentProgress, - f"Section {sectionIndex}/{totalSections}: {message}" - ) - - return progressCallback -``` - -## Error Handling - -### Error Section Creation - -```python -def createErrorSection( - originalSection: Dict[str, Any], - errorMessage: str -) -> Dict[str, Any]: - """ - Create error placeholder section. - """ - return { - "id": originalSection.get("id", "unknown"), - "content_type": "paragraph", # Change to paragraph for error display - "elements": [{ - "text": f"[ERROR: Failed to generate {originalSection.get('content_type', 'content')} for section '{originalSection.get('id', 'unknown')}'. Error: {errorMessage}]" - }], - "order": originalSection.get("order", 0), - "error": True, - "errorMessage": errorMessage, - "originalContentType": originalSection.get("content_type") - } -``` - -### Error Handling in Content Generation - -```python -async def _generateSectionContent( - self, - section: Dict[str, Any], - context: GenerationContext, - services: Any -) -> Dict[str, Any]: - """ - Generate content for a single section with error handling. - """ - try: - complexity = section.get("complexity", "simple") - contentType = section.get("content_type") - - if contentType == "image": - return await self._generateImageSection(section, context, services) - elif complexity == "complex": - return await self._generateComplexTextSection(section, context, services) - else: - return await self._generateSimpleSection(section, context, services) - - except Exception as e: - logger.error(f"Error generating section {section.get('id')}: {str(e)}") - return createErrorSection(section, str(e)) -``` - -## Performance Considerations - -### Parallel Generation - -```python -async def _generateSectionsParallel( - self, - sections: List[Dict[str, Any]], - context: GenerationContext, - services: Any, - progressCallback: Optional[Callable] = None -) -> List[Dict[str, Any]]: - """ - Generate content for multiple sections in parallel. - """ - async def generateWithProgress(section: Dict[str, Any], index: int): - if progressCallback: - progressCallback(index + 1, len(sections), f"Generating {section.get('content_type')}...") - - return await self._generateSectionContent(section, context, services) - - # Generate all sections in parallel - results = await asyncio.gather( - *[generateWithProgress(section, idx) for idx, section in enumerate(sections)], - return_exceptions=True - ) - - # Handle exceptions - generatedSections = [] - for idx, result in enumerate(results): - if isinstance(result, Exception): - logger.error(f"Error generating section {idx}: {str(result)}") - generatedSections.append( - createErrorSection(sections[idx], str(result)) - ) - else: - generatedSections.append(result) - - return generatedSections -``` - -### Batch Processing for Large Documents - -```python -async def generateContent( - self, - structure: Dict[str, Any], - cachedContent: Optional[ContentCache], - userPrompt: str, - services: Any, - progressCallback: Optional[Callable] = None, - batchSize: int = 10 -) -> Dict[str, Any]: - """ - Generate content with batching for large documents. - """ - documents = structure.get("documents", []) - - for doc in documents: - sections = doc.get("sections", []) - - # Process in batches - for batchStart in range(0, len(sections), batchSize): - batch = sections[batchStart:batchStart + batchSize] - - # Generate batch in parallel - generatedBatch = await self._generateSectionsParallel( - batch, - context, - services, - progressCallback - ) - - # Update sections - for idx, generated in enumerate(generatedBatch): - sections[batchStart + idx] = generated - - return structure -``` - -## Testing Strategy - -### Unit Tests - -1. **StructureGenerator Tests**: - - Test structure generation with/without source documents - - Test complexity identification - - Test image prompt extraction - -2. **ContentGenerator Tests**: - - Test simple section generation - - Test image section generation - - Test complex text section generation - - Test parallel generation - - Test error handling - -3. **ContentIntegrator Tests**: - - Test content merging - - Test validation - - Test error section creation - -### Integration Tests - -1. **End-to-End Tests**: - - Test complete document generation flow - - Test with images - - Test with long documents - - Test error scenarios - -2. **Renderer Tests**: - - Test HTML renderer with separate image files - - Test PDF renderer with embedded images - - Test XLSX/PPTX renderers with images - -### Performance Tests - -1. **Large Document Tests**: - - Test with 100+ sections - - Test parallel generation performance - - Test memory usage - -2. **Image Generation Tests**: - - Test multiple images - - Test large images - - Test image generation failures - diff --git a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md b/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md deleted file mode 100644 index 4476c2b9..00000000 --- a/modules/workflows/processing/shared/IMPLEMENTATION_PLAN_HIERARCHICAL_DOCUMENT_GENERATION.md +++ /dev/null @@ -1,398 +0,0 @@ -# Implementation Plan: Hierarchical Document Generation - -## Overview - -This document outlines the step-by-step implementation plan for the hierarchical document generation system with image integration. - -## Implementation Phases - -### Phase 1: Core Infrastructure (Week 1) - -**Goal**: Set up core components and data structures - -#### Tasks: - -1. **Create StructureGenerator Component** - - [ ] Create `subStructureGenerator.py` - - [ ] Implement `generateStructure()` method - - [ ] Implement `_createStructurePrompt()` method - - [ ] Implement `_identifySectionComplexity()` method - - [ ] Implement `_extractImagePrompts()` method - - [ ] Add unit tests - -2. **Create ContentGenerator Component** - - [ ] Create `subContentGenerator.py` - - [ ] Implement `generateContent()` method - - [ ] Implement `_generateSectionContent()` method - - [ ] Implement `_generateSimpleSection()` method - - [ ] Implement `_generateComplexTextSection()` method - - [ ] Implement `_createSectionPrompt()` method - - [ ] Add unit tests - -3. **Create ContentIntegrator Component** - - [ ] Create `subContentIntegrator.py` - - [ ] Implement `integrateContent()` method - - [ ] Implement `validateCompleteness()` method - - [ ] Implement `createErrorSection()` method - - [ ] Add unit tests - -4. **Update generateDocument Action** - - [ ] Modify `generateDocument.py` to use hierarchical approach - - [ ] Add Phase 1: Structure generation - - [ ] Add Phase 2: Content generation (sequential first) - - [ ] Add Phase 3: Integration & rendering - - [ ] Add basic progress logging - - [ ] Add error handling - -**Deliverables**: -- Core components created -- Basic hierarchical generation working (sequential) -- Unit tests passing - -**Estimated Time**: 3-4 days - ---- - -### Phase 2: Image Generation Integration (Week 1-2) - -**Goal**: Integrate image generation into content generation - -#### Tasks: - -1. **Implement Image Section Generation** - - [ ] Add `_generateImageSection()` method to ContentGenerator - - [ ] Integrate with `ai.generate` action - - [ ] Handle base64 image data storage - - [ ] Add image prompt extraction from structure - - [ ] Add error handling for image generation failures - -2. **Update Structure Generation Prompt** - - [ ] Add image section detection in structure prompt - - [ ] Add image_prompt field extraction - - [ ] Test with user prompts requesting images - -3. **Test Image Integration** - - [ ] Test image generation in document structure - - [ ] Test multiple images in one document - - [ ] Test image generation failures - -**Deliverables**: -- Image generation integrated -- Images stored as base64 in JSON -- Error handling for image failures - -**Estimated Time**: 2-3 days - ---- - -### Phase 3: Parallel Processing & Progress Logging (Week 2) - -**Goal**: Implement parallel section generation and detailed progress logging - -#### Tasks: - -1. **Implement Parallel Generation** - - [ ] Add `_generateSectionsParallel()` method - - [ ] Use `asyncio.gather()` for parallel execution - - [ ] Add batch processing for large documents - - [ ] Handle exceptions in parallel execution - - [ ] Test parallel vs sequential performance - -2. **Enhance Progress Logging** - - [ ] Create progress callback system - - [ ] Add detailed progress messages: - - Structure generation progress - - Section-by-section progress - - Image generation progress - - Rendering progress - - [ ] Calculate accurate progress percentages - - [ ] Test progress updates - -3. **Update generateDocument Action** - - [ ] Integrate parallel generation - - [ ] Add progress callback to content generation - - [ ] Update progress logging throughout phases - -**Deliverables**: -- Parallel section generation working -- Detailed progress logging -- Performance improvements - -**Estimated Time**: 2-3 days - ---- - -### Phase 4: Renderer Updates (Week 2-3) - -**Goal**: Update renderers to properly handle images - -#### Tasks: - -1. **Update HTML Renderer** - - [ ] Modify `rendererHtml.py` - - [ ] Add `_extractImages()` method - - [ ] Implement separate image file creation - - [ ] Update HTML to use relative image paths - - [ ] Handle multiple image files - - [ ] Test HTML + image files output - -2. **Update PDF Renderer** - - [ ] Modify `rendererPdf.py` - - [ ] Update `_renderJsonImage()` to embed images - - [ ] Use `reportlab.platypus.Image()` with base64 - - [ ] Handle image sizing and positioning - - [ ] Test PDF with embedded images - -3. **Update XLSX Renderer** - - [ ] Modify `rendererXlsx.py` - - [ ] Add `_renderJsonImage()` method - - [ ] Use `openpyxl.drawing.image.Image()` to embed images - - [ ] Handle image placement in cells - - [ ] Test XLSX with images - -4. **Update PPTX Renderer** - - [ ] Modify `rendererPptx.py` - - [ ] Add `_renderJsonImage()` method - - [ ] Use `slide.shapes.add_picture()` to add images - - [ ] Handle image sizing on slides - - [ ] Test PPTX with images - -**Deliverables**: -- All renderers support images -- HTML creates separate image files -- PDF/XLSX/PPTX embed images directly - -**Estimated Time**: 4-5 days - ---- - -### Phase 5: Content Caching & Optimization (Week 3) - -**Goal**: Implement content caching to avoid re-extraction - -#### Tasks: - -1. **Implement Content Cache** - - [ ] Create ContentCache data structure - - [ ] Extract content once at start of generation - - [ ] Pass cached content to all sub-prompts - - [ ] Add cache validation (check if documents changed) - - [ ] Test cache reuse - -2. **Optimize Prompt Building** - - [ ] Update structure prompt to use cached content - - [ ] Update section prompts to use cached content - - [ ] Format cached content efficiently - - [ ] Test prompt sizes - -3. **Performance Testing** - - [ ] Test with large documents - - [ ] Test with multiple source documents - - [ ] Measure performance improvements - - [ ] Optimize bottlenecks - -**Deliverables**: -- Content caching implemented -- No redundant content extraction -- Performance optimized - -**Estimated Time**: 2-3 days - ---- - -### Phase 6: Error Handling & Edge Cases (Week 3-4) - -**Goal**: Robust error handling and edge case coverage - -#### Tasks: - -1. **Enhance Error Handling** - - [ ] Improve error section creation - - [ ] Add error recovery strategies - - [ ] Handle partial failures gracefully - - [ ] Add error logging and reporting - -2. **Handle Edge Cases** - - [ ] Empty document list - - [ ] No sections generated - - [ ] All sections fail - - [ ] Very large images - - [ ] Very long documents (100+ sections) - - [ ] Missing image prompts - - [ ] Invalid section types - -3. **Add Validation** - - [ ] Validate structure before content generation - - [ ] Validate content before integration - - [ ] Validate final document before rendering - - [ ] Add comprehensive error messages - -**Deliverables**: -- Robust error handling -- Edge cases covered -- Clear error messages - -**Estimated Time**: 2-3 days - ---- - -### Phase 7: Testing & Refinement (Week 4) - -**Goal**: Comprehensive testing and refinement - -#### Tasks: - -1. **Unit Testing** - - [ ] Complete unit tests for all components - - [ ] Test all methods - - [ ] Test error scenarios - - [ ] Achieve >80% code coverage - -2. **Integration Testing** - - [ ] Test end-to-end document generation - - [ ] Test with various document types - - [ ] Test with images - - [ ] Test with long documents - - [ ] Test error scenarios - -3. **Performance Testing** - - [ ] Test with 10, 50, 100+ sections - - [ ] Measure generation time - - [ ] Measure memory usage - - [ ] Compare parallel vs sequential - - [ ] Optimize if needed - -4. **User Acceptance Testing** - - [ ] Test with real user scenarios - - [ ] Test bedtime story with images (original use case) - - [ ] Test business documents - - [ ] Test technical documents - - [ ] Gather feedback - -5. **Documentation** - - [ ] Update API documentation - - [ ] Add code comments - - [ ] Update user guides - - [ ] Create examples - -**Deliverables**: -- Comprehensive test suite -- Performance benchmarks -- Documentation complete -- Ready for production - -**Estimated Time**: 3-4 days - ---- - -## Dependencies - -### External Dependencies -- `asyncio` - For parallel processing -- `base64` - For image encoding/decoding -- `reportlab` - For PDF image embedding -- `openpyxl` - For XLSX image embedding -- `python-pptx` - For PPTX image embedding - -### Internal Dependencies -- `serviceGeneration` - Main generation service -- `serviceAi` - AI service for generation -- `serviceExtraction` - Content extraction service -- `methodAi.actions.generate` - Image generation action -- `methodAi.actions.process` - Text generation action - -## Risk Mitigation - -### Risks and Mitigation Strategies - -1. **Risk**: Image generation failures break entire document - - **Mitigation**: Error handling creates error sections, continues processing - -2. **Risk**: Parallel generation causes memory issues - - **Mitigation**: Batch processing, limit concurrent operations - -3. **Risk**: Large base64 images cause JSON size issues - - **Mitigation**: Consider compression or chunking for very large images - -4. **Risk**: HTML renderer needs to return multiple files - - **Mitigation**: Modify return type or create file bundle system - -5. **Risk**: Performance not meeting expectations - - **Mitigation**: Profile and optimize bottlenecks, consider caching - -## Success Criteria - -### Functional Requirements -- ✅ Documents can be generated with embedded images -- ✅ HTML renderer creates separate image files -- ✅ PDF/XLSX/PPTX renderers embed images -- ✅ Progress logging shows detailed progress -- ✅ Error handling prevents complete failures -- ✅ Content extraction happens only once - -### Performance Requirements -- ✅ Parallel generation improves performance by 2x+ for multi-section documents -- ✅ Progress updates appear within 1 second of action -- ✅ Documents with 50+ sections complete in <5 minutes - -### Quality Requirements -- ✅ >80% code coverage -- ✅ All edge cases handled -- ✅ Clear error messages -- ✅ Comprehensive documentation - -## Rollout Plan - -### Step 1: Internal Testing (Week 4) -- Deploy to development environment -- Internal team testing -- Fix critical issues - -### Step 2: Beta Testing (Week 5) -- Deploy to staging environment -- Select beta users -- Gather feedback -- Fix issues - -### Step 3: Production Deployment (Week 6) -- Deploy to production -- Monitor performance -- Monitor errors -- Gather user feedback - -### Step 4: Optimization (Ongoing) -- Monitor usage patterns -- Optimize based on real-world usage -- Add enhancements based on feedback - -## Timeline Summary - -| Phase | Duration | Start | End | -|-------|----------|-------|-----| -| Phase 1: Core Infrastructure | 3-4 days | Day 1 | Day 4 | -| Phase 2: Image Integration | 2-3 days | Day 4 | Day 7 | -| Phase 3: Parallel Processing | 2-3 days | Day 7 | Day 10 | -| Phase 4: Renderer Updates | 4-5 days | Day 10 | Day 15 | -| Phase 5: Content Caching | 2-3 days | Day 15 | Day 18 | -| Phase 6: Error Handling | 2-3 days | Day 18 | Day 21 | -| Phase 7: Testing & Refinement | 3-4 days | Day 21 | Day 25 | - -**Total Estimated Time**: 4-5 weeks - -## Next Steps - -1. **Review and Approve Plan** - - Review implementation plan - - Approve timeline - - Assign resources - -2. **Set Up Development Environment** - - Create feature branch - - Set up test infrastructure - - Prepare development tools - -3. **Begin Phase 1** - - Start with StructureGenerator - - Set up project structure - - Begin implementation - diff --git a/modules/workflows/workflowManager.py b/modules/workflows/workflowManager.py index a5971904..593ba555 100644 --- a/modules/workflows/workflowManager.py +++ b/modules/workflows/workflowManager.py @@ -167,50 +167,86 @@ class WorkflowManager: self.workflowProcessor = WorkflowProcessor(self.services) - # Get workflow mode to determine if complexity detection is needed + # Get workflow mode to determine if combined analysis is needed workflowMode = getattr(self.services.workflow, 'workflowMode', None) - skipComplexityDetection = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) + skipCombinedAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) - if skipComplexityDetection: - logger.info("Skipping complexity detection for AUTOMATION mode - using predefined plan") + if skipCombinedAnalysis: + logger.info("Skipping combined analysis for AUTOMATION mode - using predefined plan") complexity = "moderate" # Default for automation workflows needsWorkflowHistory = False # Automation workflows don't need history detectedLanguage = None # No language detection in automation mode + normalizedRequest = userInput.prompt + intentText = userInput.prompt + contextItems = [] + workflowIntent = None else: - # Process user-uploaded documents from userInput for complexity detection - # This is the correct way: use the input data directly, not workflow state + # Process user-uploaded documents from userInput for combined analysis documents = [] if userInput.listFileId: try: documents = await self._processFileIds(userInput.listFileId, None) except Exception as e: - logger.warning(f"Failed to process user fileIds for complexity detection: {e}") + logger.warning(f"Failed to process user fileIds for combined analysis: {e}") - # Detect complexity (AI-based semantic understanding) using user input documents - # Also detects language for fast path responses - complexity, needsWorkflowHistory, detectedLanguage = await self.workflowProcessor.detectComplexity(userInput.prompt, documents) - logger.info(f"Request complexity detected: {complexity}, needsWorkflowHistory: {needsWorkflowHistory}, language: {detectedLanguage}") + # Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call + analysisResult = await self._analyzeUserInputAndComplexity(userInput.prompt, documents) - # Set detected language for fast path (if detected) + # Extract results + detectedLanguage = analysisResult.get('detectedLanguage') + normalizedRequest = analysisResult.get('normalizedRequest') + intentText = analysisResult.get('intent') or userInput.prompt + contextItems = analysisResult.get('contextItems', []) + complexity = analysisResult.get('complexity', 'moderate') + needsWorkflowHistory = analysisResult.get('needsWorkflowHistory', False) + fastTrack = analysisResult.get('fastTrack', False) + + # Extract intent analysis fields and store as workflowIntent + workflowIntent = { + 'primaryGoal': analysisResult.get('primaryGoal'), + 'dataType': analysisResult.get('dataType', 'unknown'), + 'expectedFormats': analysisResult.get('expectedFormats', []), + 'qualityRequirements': analysisResult.get('qualityRequirements', {}), + 'successCriteria': analysisResult.get('successCriteria', []), + 'languageUserDetected': detectedLanguage, + 'needsWorkflowHistory': needsWorkflowHistory + } + + # Store needsWorkflowHistory in services + setattr(self.services, '_needsWorkflowHistory', bool(needsWorkflowHistory)) + + # Store workflowIntent in workflow object for reuse + if hasattr(self.services, 'workflow') and self.services.workflow: + self.services.workflow._workflowIntent = workflowIntent + + # Store normalized request and intent + self.services.currentUserPrompt = intentText or userInput.prompt + self.services.currentUserPromptNormalized = normalizedRequest or intentText or userInput.prompt + if contextItems is not None: + self.services.currentUserContextItems = contextItems + + # Set detected language if detectedLanguage and isinstance(detectedLanguage, str): self._setUserLanguage(detectedLanguage) try: setattr(self.services, 'currentUserLanguage', detectedLanguage) except Exception: pass + + logger.info(f"Combined analysis: complexity={complexity}, needsWorkflowHistory={needsWorkflowHistory}, language={detectedLanguage}, fastTrack={fastTrack}") # Route to fast path for simple requests if history is not needed # Skip fast path for automation mode or if history is needed - if complexity == "simple" and not needsWorkflowHistory: + if not skipCombinedAnalysis and complexity == "simple" and not needsWorkflowHistory: logger.info("Routing to fast path for simple request") await self._executeFastPath(userInput, documents) return # Fast path completes the workflow - # Now send the first message (which will also process the documents again, but that's fine) - await self._sendFirstMessage(userInput) + # Now send the first message (use already analyzed data if available) + await self._sendFirstMessage(userInput, skipIntentionAnalysis=not skipCombinedAnalysis) # Route to full workflow for moderate/complex requests or automation mode - logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipComplexityDetection else "")) + logger.info(f"Routing to full workflow for {complexity} request" + (" (automation mode)" if skipCombinedAnalysis else "")) taskPlan = await self._planTasks(userInput) await self._executeTasks(taskPlan) await self._processWorkflowResults() @@ -223,6 +259,143 @@ class WorkflowManager: # Helper functions + async def _analyzeUserInputAndComplexity( + self, + userPrompt: str, + documents: List[ChatDocument] + ) -> Dict[str, Any]: + """ + Phase 1+2: Kombinierte Analyse: Intent + Komplexität in einem AI-Call. + + Args: + userPrompt: User-Anfrage + documents: Liste der Dokumente + + Returns: + Dict mit: + - detectedLanguage: ISO 639-1 Sprachcode + - normalizedRequest: Vollständige, explizite Umformulierung + - intent: Kurze Kern-Anfrage + - contextItems: Große Datenblöcke als separate Dokumente + - complexity: "simple" | "moderate" | "complex" + - needsWorkflowHistory: bool + - fastTrack: bool + - primaryGoal: Hauptziel + - dataType: Datentyp + - expectedFormats: Erwartete Formate + - qualityRequirements: Qualitätsanforderungen + - successCriteria: Erfolgskriterien + """ + # Baue Dokument-Liste für Prompt + docListText = "" + if documents: + for i, doc in enumerate(documents, 1): + docListText += f"\n{i}. {doc.fileName} ({doc.mimeType}, {doc.fileSize} bytes)" + + analysisPrompt = f"""You are an input analyzer. From the user's message, perform ALL of the following in one pass: + +1. detectedLanguage: Detect ISO 639-1 language code (e.g., de, en, fr, it) +2. normalizedRequest: Full, explicit restatement of the user's request in the detected language; do NOT summarize; preserve ALL constraints and details +3. intent: Concise single-paragraph core request in the detected language for high-level routing +4. contextItems: Supportive data blocks to attach as separate documents if significantly larger than the intent (large literal content, long lists/tables, code/JSON blocks, transcripts, CSV fragments, detailed specs). Keep URLs in the intent unless they embed large pasted content +5. complexity: "simple" | "moderate" | "complex" + - "simple": Only if NO documents AND NO web search required. Single question, straightforward answer (5-15s) + - "moderate": Multiple steps, some documents, structured response requiring some processing, or web search needed (30-60s) + - "complex": Multi-task workflow, many documents, research needed, content generation required, multi-step planning (60-120s) +6. needsWorkflowHistory: Boolean indicating if this request needs previous workflow rounds/history (e.g., 'continue', 'retry', 'fix', 'improve', 'update', 'modify', 'based on previous', 'build on', references to earlier work) +7. fastTrack: Boolean indicating if Fast Track is possible (simple requests without documents and without workflow history) +8. primaryGoal: The main objective the user wants to achieve +9. dataType: What type of data/content they want (numbers|text|documents|analysis|code|unknown) +10. expectedFormats: What file format(s) they expect - provide matching file format extensions list (e.g., ["xlsx", "pdf"]). If format is unclear or not specified, use empty list [] +11. qualityRequirements: Quality requirements they have (accuracy, completeness) as {{accuracyThreshold: 0.0-1.0, completenessThreshold: 0.0-1.0}} +12. successCriteria: Specific success criteria that define completion (array of strings) + +Rules: +- If total content (intent + data) is < 10% of model max tokens, do not extract; return empty contextItems and keep intent compact and self-contained +- If content exceeds that threshold, move bulky parts into contextItems; keep intent short and clear +- Preserve critical references (URLs, filenames) in intent +- Normalize to the primary detected language if mixed-language +- Consider number of documents provided when determining complexity +- Consider need for external research or web search when determining complexity + +Documents provided: {len(documents)} document(s) +{docListText} + +Return ONLY JSON (no markdown) with this exact structure: +{{ + "detectedLanguage": "de|en|fr|it|...", + "normalizedRequest": "Full explicit instruction in detected language", + "intent": "Concise normalized request...", + "contextItems": [ + {{ + "title": "User context 1", + "mimeType": "text/plain", + "content": "Full extracted content block here" + }} + ], + "complexity": "simple" | "moderate" | "complex", + "needsWorkflowHistory": true|false, + "fastTrack": true|false, + "primaryGoal": "The main objective the user wants to achieve", + "dataType": "numbers|text|documents|analysis|code|unknown", + "expectedFormats": ["pdf", "docx", "xlsx", "txt", "json", "csv", "html", "md"], + "qualityRequirements": {{ + "accuracyThreshold": 0.0-1.0, + "completenessThreshold": 0.0-1.0 + }}, + "successCriteria": ["specific criterion 1", "specific criterion 2"] +}} + +## User Message +The following is the user's original input message. Analyze intent, normalize the request, determine complexity, and identify any large context blocks that should be moved to separate documents: + +################ USER INPUT START ################# +{userPrompt.replace('{', '{{').replace('}', '}}') if userPrompt else ''} +################ USER INPUT FINISH ################# +""" + + # AI-Call (verwende callAiPlanning für einfache JSON-Responses) + # Debug-Logs werden bereits von callAiPlanning geschrieben + aiResponse = await self.services.ai.callAiPlanning( + prompt=analysisPrompt, + placeholders=None, + debugType="user_input_analysis" + ) + + # Parse Result + try: + jsonStart = aiResponse.find('{') if aiResponse else -1 + jsonEnd = aiResponse.rfind('}') + 1 if aiResponse else 0 + if jsonStart != -1 and jsonEnd > jsonStart: + result = json.loads(aiResponse[jsonStart:jsonEnd]) + return result + else: + logger.warning("Could not parse combined analysis response, using defaults") + return self._getDefaultAnalysisResult() + except Exception as e: + logger.warning(f"Error parsing combined analysis response: {str(e)}, using defaults") + return self._getDefaultAnalysisResult() + + def _getDefaultAnalysisResult(self) -> Dict[str, Any]: + """Fallback Default-Werte wenn Parsing fehlschlägt.""" + return { + "detectedLanguage": "en", + "normalizedRequest": "", + "intent": "", + "contextItems": [], + "complexity": "moderate", + "needsWorkflowHistory": False, + "fastTrack": False, + "primaryGoal": None, + "dataType": "unknown", + "expectedFormats": [], + "qualityRequirements": { + "accuracyThreshold": 0.8, + "completenessThreshold": 0.8 + }, + "successCriteria": [] + } + async def _executeFastPath(self, userInput: UserInputRequest, documents: List[ChatDocument]) -> None: """Execute fast path for simple requests and deliver result to user""" try: @@ -330,7 +503,7 @@ class WorkflowManager: await self._executeTasks(taskPlan) await self._processWorkflowResults() - async def _sendFirstMessage(self, userInput: UserInputRequest) -> None: + async def _sendFirstMessage(self, userInput: UserInputRequest, skipIntentionAnalysis: bool = False) -> None: """Send first message to start workflow""" try: workflow = self.services.workflow @@ -360,21 +533,58 @@ class WorkflowManager: } # Analyze the user's input to detect language, normalize request, extract intent, and offload bulky context into documents - # SKIP user intention analysis for AUTOMATION mode - it uses predefined JSON plans + # SKIP user intention analysis if already done in combined analysis (skipIntentionAnalysis=True) + # or for AUTOMATION mode - it uses predefined JSON plans createdDocs = [] workflowMode = getattr(workflow, 'workflowMode', None) - skipIntentionAnalysis = (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) + skipIntentionAnalysis = skipIntentionAnalysis or (workflowMode == WorkflowModeEnum.WORKFLOW_AUTOMATION) if skipIntentionAnalysis: - logger.info("Skipping user intention analysis for AUTOMATION mode - using direct user input") - # For automation mode, use user input directly without AI analysis - self.services.currentUserPrompt = userInput.prompt - # Always set currentUserPromptNormalized - use user input directly for automation mode - self.services.currentUserPromptNormalized = userInput.prompt - detectedLanguage = None - normalizedRequest = None - intentText = userInput.prompt - contextItems = [] + logger.info("Skipping user intention analysis (already done in combined analysis or AUTOMATION mode)") + # Use already analyzed data if available, otherwise use user input directly + detectedLanguage = getattr(self.services, 'currentUserLanguage', None) + normalizedRequest = getattr(self.services, 'currentUserPromptNormalized', None) or userInput.prompt + intentText = getattr(self.services, 'currentUserPrompt', None) or userInput.prompt + contextItems = getattr(self.services, 'currentUserContextItems', None) or [] + workflowIntent = getattr(workflow, '_workflowIntent', None) + + # Create documents for context items (if available from combined analysis) + if contextItems and isinstance(contextItems, list): + for idx, item in enumerate(contextItems): + try: + title = item.get('title') if isinstance(item, dict) else None + mime = item.get('mimeType') if isinstance(item, dict) else None + content = item.get('content') if isinstance(item, dict) else None + if not content: + continue + fileName = (title or f"user_context_{idx+1}.txt").strip() + mimeType = (mime or "text/plain").strip() + + # Neutralize content before storing if neutralization is enabled + contentBytes = content.encode('utf-8') + contentBytes = await self._neutralizeContentIfEnabled(contentBytes, mimeType) + + # Create file in component storage + fileItem = self.services.interfaceDbComponent.createFile( + name=fileName, + mimeType=mimeType, + content=contentBytes + ) + # Persist file data + self.services.interfaceDbComponent.createFileData(fileItem.id, contentBytes) + + # Collect file info + fileInfo = self.services.chat.getFileInfo(fileItem.id) + from modules.datamodels.datamodelChat import ChatDocument + doc = ChatDocument( + fileId=fileItem.id, + fileName=fileInfo.get("fileName", fileName) if fileInfo else fileName, + fileSize=fileInfo.get("size", len(contentBytes)) if fileInfo else len(contentBytes), + mimeType=fileInfo.get("mimeType", mimeType) if fileInfo else mimeType + ) + createdDocs.append(doc) + except Exception: + continue else: try: analyzerPrompt = ( diff --git a/tests/functional/test09_document_generation_formats.py b/tests/functional/test09_document_generation_formats.py index 0834f440..49860665 100644 --- a/tests/functional/test09_document_generation_formats.py +++ b/tests/functional/test09_document_generation_formats.py @@ -39,6 +39,7 @@ class DocumentGenerationFormatsTester: self.workflow = None self.testResults = {} self.generatedDocuments = {} + self.pdfFileId = None # Store PDF file ID for reuse async def initialize(self): """Initialize the test environment.""" @@ -53,17 +54,123 @@ class DocumentGenerationFormatsTester: print(f"Initialized test with user: {self.testUser.id}") print(f"Mandate ID: {self.testUser.mandateId}") print(f"Debug logging enabled: {APP_CONFIG.get('APP_DEBUG_CHAT_WORKFLOW_ENABLED', False)}") + + # Upload PDF file for testing + await self.uploadPdfFile() + + async def uploadPdfFile(self): + """Upload the PDF file and store its file ID.""" + pdfPath = os.path.join(os.path.dirname(__file__), "..", "..", "..", "local", "temp", "B2025-02c.pdf") + pdfPath = os.path.abspath(pdfPath) + + if not os.path.exists(pdfPath): + print(f"⚠️ Warning: PDF file not found at {pdfPath}") + print(" Test will continue without PDF attachment") + return + + try: + # Read PDF file + with open(pdfPath, "rb") as f: + pdfContent = f.read() + + # Create file using services.interfaceDbComponent + if not hasattr(self.services, 'interfaceDbComponent') or not self.services.interfaceDbComponent: + print("⚠️ Warning: interfaceDbComponent not available in services") + print(" Test will continue without PDF attachment") + return + + interfaceDbComponent = self.services.interfaceDbComponent + + fileItem = interfaceDbComponent.createFile( + name="B2025-02c.pdf", + mimeType="application/pdf", + content=pdfContent + ) + + # Store file data + interfaceDbComponent.createFileData(fileItem.id, pdfContent) + + self.pdfFileId = fileItem.id + print(f"✅ Uploaded PDF file: {fileItem.fileName} (ID: {self.pdfFileId}, Size: {len(pdfContent)} bytes)") + + except Exception as e: + import traceback + print(f"⚠️ Warning: Failed to upload PDF file: {str(e)}") + print(f" Traceback: {traceback.format_exc()}") + print(" Test will continue without PDF attachment") def createTestPrompt(self, format: str) -> str: - """Create a test prompt for document generation in the specified format.""" + """Create a unified test prompt for document generation in the specified format. + + The prompt requests: + - Extraction of images from the attached PDF + - Generation of a new image + - Document creation with both images + """ + basePrompt = ( + "Create a professional document about 'Fuel Station Receipt Analysis' with the following content:\n" + "1) A main title\n" + "2) An introduction paragraph explaining the receipt analysis\n" + "3) Extract and include the image from the attached PDF document (B2025-02c.pdf)\n" + "4) A section analyzing the receipt data with bullet points\n" + "5) Generate a new image showing a visual representation of fuel consumption trends\n" + "6) A conclusion paragraph with recommendations\n\n" + "Make sure to include both: the image extracted from the PDF and the newly generated image.\n" + f"Format the output as {format.upper()}." + ) + return basePrompt + + def createRefactoringTestPrompt(self, testType: str, format: str = "html") -> str: + """Create test prompts for specific refactoring features. + + Args: + testType: Type of refactoring test: + - "intent_analysis": Test DocumentIntent analysis + - "conditional_extraction": Test conditional extraction (extract vs render) + - "image_render": Test image rendering as asset + - "multi_document": Test multi-document rendering + - "metadata_preservation": Test metadata preservation + format: Output format (default: html) + """ prompts = { - "html": "Create a professional HTML document about 'The Future of Artificial Intelligence' with: 1) A main title, 2) An introduction paragraph, 3) Three key sections with headings, 4) A bullet list of benefits, 5) An image showing AI technology (generate it), 6) A conclusion paragraph. Format as HTML.", - "pdf": "Create a professional PDF report about 'Climate Change Impact Analysis' with: 1) A title page, 2) An executive summary, 3) Three main sections with data tables, 4) Charts/graphs described, 5) An image showing environmental impact (generate it), 6) Conclusions and recommendations. Format as PDF.", - "docx": "Create a comprehensive Word document about 'Project Management Best Practices' with: 1) A cover page with title, 2) Table of contents, 3) Five chapters with headings and paragraphs, 4) A table comparing methodologies, 5) An image illustrating project workflow (generate it), 6) Appendices. Format as DOCX.", - "xlsx": "Create an Excel workbook about 'Sales Performance Analysis' with: 1) A summary sheet with key metrics, 2) A detailed data sheet with sales data in a table format (columns: Month, Product, Sales, Units, Revenue), 3) A chart sheet with visualizations described, 4) An analysis sheet with calculations. Format as XLSX.", - "pptx": "Create a PowerPoint presentation about 'Digital Transformation Strategy' with: 1) A title slide, 2) An agenda slide, 3) Five content slides with bullet points, 4) A slide with an image showing transformation roadmap (generate it), 5) A conclusion slide. Format as PPTX." + "intent_analysis": ( + "Create a document with the following requirements:\n" + "1) Extract text content from the attached PDF\n" + "2) Include images from the PDF as visual elements (render them, don't extract text from them)\n" + "3) Generate a summary document\n\n" + "This tests that the system correctly identifies which documents need extraction vs rendering." + ), + "conditional_extraction": ( + "Create a document that:\n" + "1) Extracts and uses text from the attached PDF\n" + "2) Renders images from the PDF as visual assets (not as extracted text)\n" + "3) Generates new content based on the extracted text\n\n" + "This tests conditional extraction - only extract what needs extraction, render what needs rendering." + ), + "image_render": ( + "Create a document that includes images from the attached PDF.\n" + "The images should be rendered as visual elements in the document, not extracted as text.\n" + "Include a title and description for each image.\n\n" + "This tests the image asset pipeline with render intent." + ), + "multi_document": ( + "Create multiple separate documents:\n" + "1) Document 1: Summary of the PDF content\n" + "2) Document 2: Analysis of the PDF content\n" + "3) Document 3: Recommendations based on the PDF content\n\n" + "Each document should be separate and complete.\n" + "This tests multi-document generation and rendering." + ), + "metadata_preservation": ( + "Create a document that extracts content from the attached PDF.\n" + "The document should clearly show which content came from which source document.\n" + "Include source references in the generated content.\n\n" + "This tests that metadata (documentId, mimeType) is preserved in the generation prompt." + ) } - return prompts.get(format.lower(), prompts["docx"]) + + prompt = prompts.get(testType, self.createTestPrompt(format)) + return f"{prompt}\n\nFormat the output as {format.upper()}." async def generateDocumentInFormat(self, format: str) -> Dict[str, Any]: """Generate a document in the specified format using workflow.""" @@ -74,9 +181,18 @@ class DocumentGenerationFormatsTester: prompt = self.createTestPrompt(format) print(f"Prompt: {prompt[:200]}...") + # Create user input request with PDF file attachment + listFileId = [] + if self.pdfFileId: + listFileId = [self.pdfFileId] + print(f"Attaching PDF file (ID: {self.pdfFileId})") + else: + print("⚠️ No PDF file attached (file upload may have failed)") + # Create user input request userInput = UserInputRequest( prompt=prompt, + listFileId=listFileId, userLanguage="en" ) @@ -281,6 +397,166 @@ class DocumentGenerationFormatsTester: return verification + async def testRefactoringFeatures(self) -> Dict[str, Any]: + """Test specific refactoring features.""" + print("\n" + "="*80) + print("TESTING REFACTORING FEATURES") + print("="*80) + + refactoringTests = [ + ("intent_analysis", "html"), + ("conditional_extraction", "html"), + ("image_render", "html"), + ("multi_document", "html"), + ("metadata_preservation", "html") + ] + + results = {} + + for testType, format in refactoringTests: + try: + print(f"\n{'='*80}") + print(f"Testing Refactoring Feature: {testType}") + print(f"{'='*80}") + + prompt = self.createRefactoringTestPrompt(testType, format) + print(f"Prompt: {prompt[:200]}...") + + # Create user input request with PDF file attachment + listFileId = [] + if self.pdfFileId: + listFileId = [self.pdfFileId] + print(f"Attaching PDF file (ID: {self.pdfFileId})") + else: + print("⚠️ No PDF file attached (file upload may have failed)") + + userInput = UserInputRequest( + prompt=prompt, + listFileId=listFileId, + userLanguage="en" + ) + + # Start workflow + print(f"\nStarting workflow for {testType} test...") + workflow = await chatStart( + currentUser=self.testUser, + userInput=userInput, + workflowMode=WorkflowModeEnum.WORKFLOW_DYNAMIC, + workflowId=None + ) + + if not workflow: + results[testType] = { + "success": False, + "error": "Failed to start workflow" + } + continue + + self.workflow = workflow + print(f"Workflow started: {workflow.id}") + + # Wait for workflow completion + completed = await self.waitForWorkflowCompletion(timeout=300) + + if not completed: + results[testType] = { + "success": False, + "error": "Workflow did not complete within timeout", + "workflowId": workflow.id + } + continue + + # Analyze results + workflowResults = self.analyzeWorkflowResults() + + # Check for specific refactoring features + verification = self.verifyRefactoringFeature(testType, workflowResults) + + results[testType] = { + "success": True, + "workflowId": workflow.id, + "verification": verification, + "workflowResults": workflowResults + } + + print(f"\n✅ {testType} test completed!") + print(f" Verification: {'✅ PASS' if verification.get('passed', False) else '❌ FAIL'}") + if verification.get("details"): + for detail in verification["details"]: + print(f" - {detail}") + + await asyncio.sleep(2) + + except Exception as e: + import traceback + print(f"\n❌ Error testing {testType}: {str(e)}") + print(traceback.format_exc()) + results[testType] = { + "success": False, + "error": str(e), + "traceback": traceback.format_exc() + } + + return results + + def verifyRefactoringFeature(self, testType: str, workflowResults: Dict[str, Any]) -> Dict[str, Any]: + """Verify that a refactoring feature works correctly.""" + documents = workflowResults.get("documents", []) + logs = workflowResults.get("logs", []) + + verification = { + "testType": testType, + "passed": False, + "details": [] + } + + if testType == "intent_analysis": + # Check that intent analysis was performed + intentLogs = [log for log in logs if "intent" in str(log).lower() or "analyzing document intent" in str(log).lower()] + if intentLogs: + verification["details"].append("Intent analysis logs found") + verification["passed"] = True + else: + verification["details"].append("No intent analysis logs found") + + elif testType == "conditional_extraction": + # Check that extraction and rendering both occurred + extractionLogs = [log for log in logs if "extract" in str(log).lower()] + renderLogs = [log for log in logs if "render" in str(log).lower() or "image" in str(log).lower()] + if extractionLogs and renderLogs: + verification["details"].append("Both extraction and rendering occurred") + verification["passed"] = True + else: + verification["details"].append(f"Missing logs: extraction={len(extractionLogs)}, render={len(renderLogs)}") + + elif testType == "image_render": + # Check that images were rendered (not extracted as text) + imageLogs = [log for log in logs if "image" in str(log).lower()] + if imageLogs: + verification["details"].append("Image rendering logs found") + verification["passed"] = True + else: + verification["details"].append("No image rendering logs found") + + elif testType == "multi_document": + # Check that multiple documents were generated + if len(documents) >= 2: + verification["details"].append(f"Multiple documents generated: {len(documents)}") + verification["passed"] = True + else: + verification["details"].append(f"Expected multiple documents, got {len(documents)}") + + elif testType == "metadata_preservation": + # Check that metadata was preserved (check logs for documentId references) + metadataLogs = [log for log in logs if "documentId" in str(log) or "SOURCE:" in str(log)] + if metadataLogs: + verification["details"].append("Metadata preservation logs found") + verification["passed"] = True + else: + verification["details"].append("No metadata preservation logs found") + + return verification + async def testAllFormats(self) -> Dict[str, Any]: """Test document generation in all formats.""" print("\n" + "="*80) @@ -334,8 +610,12 @@ class DocumentGenerationFormatsTester: return results - async def runTest(self): - """Run the complete test.""" + async def runTest(self, includeRefactoringTests: bool = True): + """Run the complete test. + + Args: + includeRefactoringTests: If True, also run refactoring feature tests + """ print("\n" + "="*80) print("DOCUMENT GENERATION FORMATS TEST") print("="*80) @@ -344,18 +624,43 @@ class DocumentGenerationFormatsTester: # Initialize await self.initialize() + # Test refactoring features first (if enabled) + refactoringResults = {} + if includeRefactoringTests: + refactoringResults = await self.testRefactoringFeatures() + # Test all formats - results = await self.testAllFormats() + formatResults = await self.testAllFormats() # Summary print("\n" + "="*80) print("TEST SUMMARY") print("="*80) + # Refactoring tests summary + refactoringSuccessCount = 0 + refactoringFailCount = 0 + if includeRefactoringTests and refactoringResults: + print("\nRefactoring Features:") + for testType, result in refactoringResults.items(): + if result.get("success"): + refactoringSuccessCount += 1 + verification = result.get("verification", {}) + passed = verification.get("passed", False) + statusIcon = "✅" if passed else "⚠️" + print(f"{statusIcon} {testType:25s}: {'PASS' if passed else 'FAIL'}") + else: + refactoringFailCount += 1 + error = result.get("error", "Unknown error") + print(f"❌ {testType:25s}: FAIL - {error}") + print(f"Refactoring Tests: {refactoringSuccessCount} passed, {refactoringFailCount} failed out of {len(refactoringResults)} tests") + + # Format tests summary + print("\nFormat Tests:") successCount = 0 failCount = 0 - for format, result in results.items(): + for format, result in formatResults.items(): if result.get("success"): successCount += 1 status = "✅ PASS" @@ -369,14 +674,28 @@ class DocumentGenerationFormatsTester: error = result.get("error", "Unknown error") print(f"❌ {format.upper():6s}: FAIL - {error}") - print(f"\nTotal: {successCount} passed, {failCount} failed out of {len(results)} formats") + print(f"\nFormat Tests: {successCount} passed, {failCount} failed out of {len(formatResults)} formats") + + # Calculate totals + totalSuccess = successCount + refactoringSuccessCount if includeRefactoringTests else successCount + totalFail = failCount + refactoringFailCount if includeRefactoringTests else failCount self.testResults = { - "success": failCount == 0, - "successCount": successCount, - "failCount": failCount, - "totalFormats": len(results), - "results": results + "success": failCount == 0 and (not includeRefactoringTests or refactoringFailCount == 0), + "formatTests": { + "successCount": successCount, + "failCount": failCount, + "totalFormats": len(formatResults), + "results": formatResults + }, + "refactoringTests": { + "successCount": refactoringSuccessCount if includeRefactoringTests else 0, + "failCount": refactoringFailCount if includeRefactoringTests else 0, + "totalTests": len(refactoringResults) if includeRefactoringTests else 0, + "results": refactoringResults if includeRefactoringTests else {} + }, + "totalSuccess": totalSuccess, + "totalFail": totalFail } return self.testResults