diff --git a/modules/services/serviceAi/mainServiceAi.py b/modules/services/serviceAi/mainServiceAi.py index 6ab60b85..a07aa441 100644 --- a/modules/services/serviceAi/mainServiceAi.py +++ b/modules/services/serviceAi/mainServiceAi.py @@ -437,8 +437,8 @@ Respond with ONLY a JSON object in this exact format: self, userPrompt: str, contentParts: List[ContentPart], - outputFormat: str, - parentOperationId: str + outputFormat: Optional[str] = None, + parentOperationId: str = None ) -> Dict[str, Any]: """Public method: Delegate to StructureGenerator.""" return await self.structureGenerator.generateStructure( @@ -461,6 +461,7 @@ Respond with ONLY a JSON object in this exact format: self, filledStructure: Dict[str, Any], outputFormat: str, + language: str, title: str, userPrompt: str, parentOperationId: str @@ -469,9 +470,15 @@ Respond with ONLY a JSON object in this exact format: Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben. + Render filled structure to documents. + Per-document format and language are extracted from structure (validated in State 3). + The outputFormat and language parameters are only used as global fallbacks. + Multiple documents can have different formats and languages. + Args: filledStructure: Gefüllte Struktur mit elements - outputFormat: Ziel-Format (pdf, docx, html, etc.) - wird für alle Dokumente verwendet + outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback + language: Language (global fallback) - Per-document language extracted from structure title: Dokument-Titel userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie @@ -480,6 +487,11 @@ Respond with ONLY a JSON object in this exact format: List of RenderedDocument objects. Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei) """ + # Language comes from structure (per-document), validated in State 3 + # This parameter is only used as global fallback if structure validation fails + # Use validated currentUserLanguage as fallback (always valid) + if not language: + language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en') # Erstelle Operation-ID für Rendering renderOperationId = f"{parentOperationId}_rendering" @@ -502,6 +514,7 @@ Respond with ONLY a JSON object in this exact format: renderedDocuments = await generationService.renderReport( filledStructure, outputFormat, + language, # Pass language (global fallback, per-document extracted in renderReport) title, userPrompt, self, @@ -577,18 +590,18 @@ Respond with ONLY a JSON object in this exact format: aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" # Starte Progress-Tracking mit Parent-Referenz + formatDisplay = outputFormat if outputFormat else "auto-determined" self.services.chat.progressLogStart( aiOperationId, "AI content processing", "Content Processing", - f"Format: {outputFormat or 'text'}", + f"Format: {formatDisplay}", parentOperationId=parentOperationId ) try: - # Initialisiere Defaults - if not outputFormat: - outputFormat = "txt" + # outputFormat is optional - if None, formats determined from prompt by AI + # No default fallback here - let AI service handle it opType = getattr(options, "operationType", None) if not opType: @@ -652,19 +665,11 @@ Respond with ONLY a JSON object in this exact format: parentOperationId=parentOperationId ) - # Other operation types (DATA_ANALYSE, etc.) - existing logic - # Fallback to document generation for backward compatibility (should not happen) - logger.warning(f"Unhandled operation type: {opType}, falling back to document generation") - return await self._handleDocumentGeneration( - prompt=prompt, - options=options, - documentList=documentList, - documentIntents=documentIntents, - contentParts=contentParts, - outputFormat=outputFormat, - title=title, - parentOperationId=parentOperationId - ) + # Other operation types (DATA_ANALYSE, etc.) - not supported + errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT" + logger.error(errorMsg) + self.services.chat.progressLogFinish(aiOperationId, False) + raise ValueError(errorMsg) except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") @@ -707,6 +712,31 @@ Respond with ONLY a JSON object in this exact format: if documentList: documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) + # Filter: Remove original documents if already covered by pre-extracted JSONs + # (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts) + if documents: + # Step 1: Identify all original document IDs covered by pre-extracted JSONs + originalDocIdsCoveredByPreExtracted = set() + for doc in documents: + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + originalDocId = preExtracted["originalDocument"]["id"] + originalDocIdsCoveredByPreExtracted.add(originalDocId) + logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}") + + # Step 2: Filter documents - remove originals covered by pre-extracted JSONs + filteredDocuments = [] + for doc in documents: + preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) + if preExtracted: + filteredDocuments.append(doc) # Keep pre-extracted JSON + elif doc.id in originalDocIdsCoveredByPreExtracted: + logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON") + else: + filteredDocuments.append(doc) # Keep regular document + + documents = filteredDocuments # Use filtered list + # Step 2: Clarify document intents (if not provided) - REQUIRED for all documents if not documentIntents and documents: documentIntents = await self.clarifyDocumentIntents( diff --git a/modules/services/serviceAi/subContentExtraction.py b/modules/services/serviceAi/subContentExtraction.py index 40bf5bf5..71c90879 100644 --- a/modules/services/serviceAi/subContentExtraction.py +++ b/modules/services/serviceAi/subContentExtraction.py @@ -408,10 +408,28 @@ class ContentExtractor: "content_extraction_result" ) + # State 2 Validation: Validate and auto-fix ContentParts + validatedParts = [] + for part in allContentParts: + # Validation 2.1: Skip ContentParts without documentId + if not part.metadata.get("documentId"): + logger.warning(f"Skipping ContentPart {part.id} - missing documentId in metadata") + continue + + # Validation 2.2: Skip ContentParts with invalid contentFormat + contentFormat = part.metadata.get("contentFormat") + if contentFormat not in ["extracted", "object", "reference"]: + logger.warning( + f"Skipping ContentPart {part.id} - invalid contentFormat: {contentFormat}" + ) + continue + + validatedParts.append(part) + # ChatLog abschließen self.services.chat.progressLogFinish(extractionOperationId, True) - return allContentParts + return validatedParts except Exception as e: self.services.chat.progressLogFinish(extractionOperationId, False) diff --git a/modules/services/serviceAi/subDocumentIntents.py b/modules/services/serviceAi/subDocumentIntents.py index d81f6e4e..e78ed11c 100644 --- a/modules/services/serviceAi/subDocumentIntents.py +++ b/modules/services/serviceAi/subDocumentIntents.py @@ -109,6 +109,21 @@ class DocumentIntentAnalyzer: "document_intent_analysis_result" ) + # State 1 Validation: Validate and auto-fix document intents + documentIds = {d.id for d in documents} + validatedIntents = [] + + for intent in documentIntents: + # Validation 1.2: Skip intents for unknown documents + if intent.documentId not in documentIds: + logger.warning(f"Skipping intent for unknown document: {intent.documentId}") + continue + validatedIntents.append(intent) + + # Validation 1.1: Documents without intents are OK (not needed) + # Intents for non-existing documents are already filtered above + documentIntents = validatedIntents + # ChatLog abschließen self.services.chat.progressLogFinish(intentOperationId, True) @@ -245,8 +260,13 @@ class DocumentIntentAnalyzer: outputFormat = actionParameters.get("outputFormat", "txt") - prompt = f"""USER REQUEST: + # FENCE user input to prevent prompt injection + fencedUserPrompt = f"""```user_request {userPrompt} +```""" + + prompt = f"""USER REQUEST: +{fencedUserPrompt} DOCUMENTS TO ANALYZE: {docListText} @@ -256,20 +276,25 @@ TASK: For each document, determine its intents (can be multiple): - "render": Image/binary should be rendered as-is (visual element) - "reference": Document reference/attachment (no extraction, just reference) -OUTPUT FORMAT: {outputFormat} +TASK: For each document, determine: +1. Intents (can be multiple): "extract", "render", "reference" +Note: Output format and language are NOT determined here - they will be + determined during structure generation (Phase 3) in the chapter structure JSON + +OUTPUT FORMAT: {outputFormat} (global fallback - for reference only) RETURN JSON: {{ "intents": [ {{ "documentId": "doc_1", - "intents": ["extract"], # Array - can contain multiple! + "intents": ["extract"], "extractionPrompt": "Extract all text content, preserving structure", "reasoning": "User needs text content for document generation" }}, {{ "documentId": "doc_2", - "intents": ["extract", "render"], # Both! Image needs text extraction AND visual rendering + "intents": ["extract", "render"], "extractionPrompt": "Extract text content from image using vision AI", "reasoning": "Image contains text that needs extraction, but also should be rendered visually" }}, diff --git a/modules/services/serviceAi/subStructureFilling.py b/modules/services/serviceAi/subStructureFilling.py index 86bcf04d..b1f6d6b6 100644 --- a/modules/services/serviceAi/subStructureFilling.py +++ b/modules/services/serviceAi/subStructureFilling.py @@ -198,6 +198,31 @@ class StructureFiller: # Füge ContentParts-Metadaten zur Struktur hinzu (für Validierung) flattenedStructure = self._addContentPartsMetadata(flattenedStructure, contentParts) + # State 4 Validation: Validate and auto-fix filled structure + # Validation 4.1: Filled structure missing 'documents' field + if "documents" not in flattenedStructure: + raise ValueError("Filled structure missing 'documents' field - cannot auto-fix") + + for doc in flattenedStructure["documents"]: + # Validation 4.4: Verify language is preserved from input structure + # Language MUST be preserved from Phase 3 structure (validated in State 3) + if "language" not in doc: + raise ValueError(f"Document {doc.get('id')} missing language in filled structure - should have been preserved from Phase 3") + + # Validate language format + if not isinstance(doc["language"], str) or len(doc["language"]) != 2: + raise ValueError(f"Document {doc.get('id')} has invalid language format in filled structure: {doc['language']} - should be 2-character ISO 639-1 code") + + for chapter in doc.get("chapters", []): + for section in chapter.get("sections", []): + # Validation 4.2: Section missing 'elements' field + if "elements" not in section: + section["elements"] = [] + logger.info(f"Section {section.get('id')} missing 'elements' - created empty list") + + # Validation 4.3: Section has empty elements list - ALLOW (intentionally empty is OK) + # No action needed - empty elements are allowed + # ChatLog abschließen self.services.chat.progressLogFinish(fillOperationId, True) @@ -1783,6 +1808,8 @@ The JSON should be a fragment that can be merged with the previous response.""" "id": doc.get("id"), "title": doc.get("title"), "filename": doc.get("filename"), + "outputFormat": doc.get("outputFormat"), # Preserve from Phase 3 + "language": doc.get("language"), # Preserve from Phase 3 "sections": [] } diff --git a/modules/services/serviceAi/subStructureGeneration.py b/modules/services/serviceAi/subStructureGeneration.py index f16bacd6..e1651f50 100644 --- a/modules/services/serviceAi/subStructureGeneration.py +++ b/modules/services/serviceAi/subStructureGeneration.py @@ -44,8 +44,8 @@ class StructureGenerator: self, userPrompt: str, contentParts: List[ContentPart], - outputFormat: str, - parentOperationId: str + outputFormat: Optional[str] = None, + parentOperationId: str = None ) -> Dict[str, Any]: """ Phase 5C: Generiert Chapter-Struktur (Table of Contents). @@ -54,24 +54,37 @@ class StructureGenerator: - contentParts (unified object with instruction and/or caption per part) - generationHint + Generate document structure with per-document format determination. + Multiple documents can be produced with different formats (e.g., one PDF, one HTML). + AI determines formats per-document from user prompt. The outputFormat parameter is + only a validation fallback - used if AI doesn't return format per document. + Args: userPrompt: User-Anfrage contentParts: Alle vorbereiteten ContentParts mit Metadaten - outputFormat: Ziel-Format (html, docx, pdf, etc.) + outputFormat: Optional global format fallback. If omitted, formats are determined + from user prompt by AI. Used as validation fallback if AI doesn't + return format per document. Defaults to "txt" if not provided. parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: Struktur-Dict mit documents und chapters (nicht sections!) """ + # If outputFormat not provided, use "txt" as fallback for validation + # AI will determine formats per document from user prompt + if not outputFormat: + outputFormat = "txt" + logger.debug("outputFormat not provided - using 'txt' as validation fallback, formats determined from prompt") # Erstelle Operation-ID für Struktur-Generierung structureOperationId = f"{parentOperationId}_structure_generation" # Starte ChatLog mit Parent-Referenz + formatDisplay = outputFormat if outputFormat else "auto-determined" self.services.chat.progressLogStart( structureOperationId, "Chapter Structure Generation", "Structure", - f"Generating chapter structure for {outputFormat}", + f"Generating chapter structure (format: {formatDisplay})", parentOperationId=parentOperationId ) @@ -181,6 +194,72 @@ Continue generating the remaining chapters now. else: structure = parsedJson + # State 3 Validation: Validate and auto-fix structure + # Validation 3.1: Structure missing 'documents' field + if "documents" not in structure: + raise ValueError("Structure missing 'documents' field - cannot auto-fix") + + documents = structure["documents"] + + # Validation 3.2: Structure has no documents + if not isinstance(documents, list) or len(documents) == 0: + raise ValueError("Structure has no documents - cannot generate without documents") + + # Import renderer registry for format validation (existing infrastructure) + from modules.services.serviceGeneration.renderers.registry import getRenderer + + # Validate and fix each document + for doc in documents: + # Validation 3.3 & 3.4: Document outputFormat + # outputFormat parameter is optional - if omitted, formats determined from prompt by AI + # Use as fallback only if AI doesn't return format per document + # Multiple documents can have different formats (e.g., one PDF, one HTML) + globalFormatFallback = outputFormat or "txt" # Fallback for validation + + if "outputFormat" not in doc or not doc["outputFormat"]: + # AI didn't return format or returned empty - use global fallback + doc["outputFormat"] = globalFormatFallback + logger.warning(f"Document {doc.get('id')} missing outputFormat - using fallback: {doc['outputFormat']}") + else: + # AI returned format - validate using existing renderer registry + formatName = str(doc["outputFormat"]).lower().strip() + renderer = getRenderer(formatName) # Uses existing infrastructure + + if not renderer: + # Format doesn't match any renderer - use txt (simple approach) + logger.warning(f"Document {doc.get('id')} has format without renderer: {formatName}, using 'txt'") + doc["outputFormat"] = "txt" + else: + # Valid format with renderer - normalize and keep AI result + doc["outputFormat"] = formatName + logger.debug(f"Document {doc.get('id')} using AI-determined format: {formatName}") + + # Validation 3.5 & 3.6: Document language + # Use validated currentUserLanguage (always valid, validated during user intention analysis) + # Access via _getUserLanguage() which uses self.services.currentUserLanguage + userPromptLanguage = self._getUserLanguage() # Uses validated currentUserLanguage infrastructure + + if "language" not in doc or not isinstance(doc["language"], str) or len(doc["language"]) != 2: + # AI didn't return language or invalid format - use validated currentUserLanguage + doc["language"] = userPromptLanguage + if "language" not in doc: + logger.warning(f"Document {doc.get('id')} missing language - using currentUserLanguage: {userPromptLanguage}") + else: + logger.warning(f"Document {doc.get('id')} has invalid language format from AI: {doc['language']}, using currentUserLanguage") + else: + # AI returned valid language format - normalize + doc["language"] = doc["language"].lower().strip()[:2] + logger.debug(f"Document {doc.get('id')} using AI-determined language: {doc['language']}") + + # Validation 3.7: Document missing 'chapters' field + if "chapters" not in doc: + raise ValueError(f"Document {doc.get('id')} missing 'chapters' field - cannot auto-fix") + + # Validation 3.8: Chapter missing 'contentParts' field + for chapter in doc["chapters"]: + if "contentParts" not in chapter: + raise ValueError(f"Chapter {chapter.get('id')} missing 'contentParts' field - cannot auto-fix") + # ChatLog abschließen self.services.chat.progressLogFinish(structureOperationId, True) @@ -261,15 +340,13 @@ This is a PLANNING task. Return EXACTLY ONE complete JSON object. Do not generat {userPrompt} ``` -DEFAULT LANGUAGE: If no language is specified for a document, use "{language}" (from user prompt). Each document can have its own language specified in the "language" field. Use ISO 639-1 language codes in lowercase (e.g., "de", "en", "fr", "it"). - ## AVAILABLE CONTENT PARTS {contentPartsIndex} -## CONTENT ASSIGNMENT RULE - CRITICAL +## CONTENT ASSIGNMENT RULE If the user request mentions documents/images/data, then EVERY chapter that generates content related to those references MUST assign the relevant ContentParts explicitly. -**Assignment logic:** +Assignment logic: - If chapter DISPLAYS a document/image → assign "object" format ContentPart with "caption" - If chapter generates text content ABOUT a document/image/data → assign ContentPart with "instruction": - Prefer "extracted" format if available (contains analyzed/extracted content) @@ -279,14 +356,11 @@ If the user request mentions documents/images/data, then EVERY chapter that gene - Use ContentPart IDs exactly as listed in AVAILABLE CONTENT PARTS above - Empty contentParts are only allowed if chapter generates content WITHOUT referencing any documents/images/data from the user request -**CRITICAL RULE**: If the user request mentions BOTH: +CRITICAL RULE: If the user request mentions BOTH: a) Documents/images/data (listed in AVAILABLE CONTENT PARTS above), AND b) Generic content types (article text, main content, body text, etc.) Then chapters that generate those generic content types MUST assign the relevant ContentParts, because the content should relate to or be based on the provided documents/images/data. -## FORMATTING -- Formatting is handled automatically - focus on content and structure only - ## CHAPTER STRUCTURE REQUIREMENTS - Generate chapters based on USER REQUEST - analyze what structure the user wants - Each chapter needs: id, level (1, 2, 3, etc.), title @@ -294,15 +368,38 @@ Then chapters that generate those generic content types MUST assign the relevant - generationHint: Description of what content to generate for this chapter - The number of chapters depends on the user request - create only what is requested -## DOCUMENT LANGUAGE -- Each document can have its own language (ISO 639-1 code in lowercase: "de", "en", "fr", "it", etc.) -- If no language is specified for a document, use the user prompt language: "{language}" -- The language determines in which language the content of that document will be generated -- Multiple documents can have different languages if needed -- Always use lowercase ISO 639-1 codes in the JSON output (e.g., "de", not "DE") +## DOCUMENT OUTPUT FORMAT +For each document, determine the output format by analyzing the USER REQUEST: +- Look for explicit format mentions +- Infer from document purpose +- Infer from content type +- If format cannot be determined from the prompt, use: "{outputFormat}" +- Include "outputFormat" field in each document in the JSON structure +- Multiple documents can have different formats -## OUTPUT FORMAT -Generate the chapter structure based on the USER REQUEST above. The number and types of chapters depend entirely on what the user requested - do NOT copy the example structure below. +## DOCUMENT LANGUAGE +For each document, determine the language by analyzing the USER REQUEST: +- Look for explicit language mentions +- Map language names to ISO 639-1 codes +- If language cannot be determined from the prompt, use: "{language}" +- Include "language" field in each document in the JSON structure +- Multiple documents can have different languages + +## JSON STRUCTURE REQUIREMENTS +- metadata: {{"title": "...", "language": "..."}} +- documents: Array of document objects, each with: + - id: Unique document identifier (e.g., "doc_1") + - title: Document title + - filename: Output filename with extension (e.g., "document.docx") + - outputFormat: Format code (e.g., "docx", "pdf", "html", "xlsx", "pptx", "txt") + - language: ISO 639-1 language code (e.g., "de", "en", "fr", "it") + - chapters: Array of chapter objects, each with: + - id: Unique chapter identifier (e.g., "chapter_1") + - level: Heading level (1, 2, 3, etc.) + - title: Chapter title + - contentParts: Object mapping ContentPart IDs to usage instructions {{"partId": {{"instruction": "..."}} or {{"caption": "..."}}}} + - generationHint: Description of what content to generate + - sections: Empty array [] EXAMPLE STRUCTURE (for reference only - adapt to user request): {{ @@ -314,6 +411,7 @@ EXAMPLE STRUCTURE (for reference only - adapt to user request): "id": "doc_1", "title": "Document Title", "filename": "document.{outputFormat}", + "outputFormat": "{outputFormat}", "language": "{language}", "chapters": [ {{ @@ -337,8 +435,10 @@ CRITICAL INSTRUCTIONS: - The example shows the JSON structure format, NOT the required chapters - Create only the chapters that match the user's request - Adapt chapter titles and structure to match the user's specific request +- Determine outputFormat and language for each document by analyzing the USER REQUEST above +- The example shows placeholders "{outputFormat}" and "{language}" - YOU MUST REPLACE THESE with actual values determined from the USER REQUEST -**MANDATORY CONTENT ASSIGNMENT CHECK:** +MANDATORY CONTENT ASSIGNMENT CHECK: For each chapter, verify: 1. Does the user request mention documents/images/data? (e.g., "photo", "image", "document", "data", "based on", "about") 2. Does this chapter's generationHint, title, or purpose relate to those documents/images/data mentioned in step 1? diff --git a/modules/services/serviceGeneration/mainServiceGeneration.py b/modules/services/serviceGeneration/mainServiceGeneration.py index 828f1033..45ef37e1 100644 --- a/modules/services/serviceGeneration/mainServiceGeneration.py +++ b/modules/services/serviceGeneration/mainServiceGeneration.py @@ -346,16 +346,19 @@ class GenerationService: 'workflowId': 'unknown' } - async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: + async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str = None, aiService=None, parentOperationId: Optional[str] = None) -> List[RenderedDocument]: """ Render extracted JSON content to the specified output format. Processes EACH document separately and calls renderer for each. Each renderer can return 1..n documents (e.g., HTML + images). + Per-document format and language are extracted from structure (validated in State 3). + Multiple documents can have different formats and languages. + Args: extractedContent: Structured JSON document with documents array - outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - In future, each document can have its own format + outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) - Global fallback + language: Language (global fallback) - Per-document language extracted from structure title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation @@ -392,9 +395,17 @@ class GenerationService: continue # Determine format for this document - # TODO: In future, each document can have its own format field - # For now, use the global outputFormat - docFormat = doc.get("format", outputFormat) + # Check outputFormat field first (per-document), then format field (legacy), then global fallback + docFormat = doc.get("outputFormat") or doc.get("format") or outputFormat + + # Determine language for this document + # Extract per-document language from structure (validated in State 3), fallback to global + docLanguage = doc.get("language") or language + + # Validate language format (should be 2-character ISO code, validated in State 3) + if not isinstance(docLanguage, str) or len(docLanguage) != 2: + logger.warning(f"Document {doc.get('id')} has invalid language format: {docLanguage}, using fallback") + docLanguage = language # Use global fallback # Get renderer for this document's format renderer = self._getFormatRenderer(docFormat) @@ -404,7 +415,7 @@ class GenerationService: # Create JSON structure with single document (preserving metadata) singleDocContent = { - "metadata": metadata, + "metadata": {**metadata, "language": docLanguage}, # Add per-document language to metadata "documents": [doc] # Only this document } diff --git a/modules/services/serviceGeneration/paths/documentPath.py b/modules/services/serviceGeneration/paths/documentPath.py index 94cbda54..94c4fc41 100644 --- a/modules/services/serviceGeneration/paths/documentPath.py +++ b/modules/services/serviceGeneration/paths/documentPath.py @@ -148,9 +148,15 @@ class DocumentGenerationPath: # Schritt 5E: Rendere Resultat # Jedes Dokument wird einzeln gerendert, kann 1..n Dateien zurückgeben (z.B. HTML + Bilder) + # Language is already validated in structure (State 3) and preserved in filled structure (State 4) + # Per-document language will be extracted in renderReport() from filledStructure + # Use validated currentUserLanguage as global fallback (always valid infrastructure) + language = self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') and self.services.currentUserLanguage else "en" + renderedDocuments = await self.services.ai.renderResult( filledStructure, outputFormat, + language, # Global fallback (per-document language extracted from structure in renderReport) title or "Generated Document", userPrompt, docOperationId diff --git a/modules/shared/frontendTypes.py b/modules/shared/frontendTypes.py index 1d1c4682..06a81570 100644 --- a/modules/shared/frontendTypes.py +++ b/modules/shared/frontendTypes.py @@ -42,6 +42,7 @@ class FrontendType(str, Enum): JSON = "json" MULTILINGUAL = "multilingual" FILE = "file" + HIDDEN = "hidden" # Custom Types for Actions USER_CONNECTION = "userConnection" diff --git a/modules/workflows/methods/methodAi/actions/process.py b/modules/workflows/methods/methodAi/actions/process.py index 16cc3307..0dd37ce3 100644 --- a/modules/workflows/methods/methodAi/actions/process.py +++ b/modules/workflows/methods/methodAi/actions/process.py @@ -78,55 +78,21 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: output_mime_type = "application/octet-stream" # Prefer service-provided mimeType when available - # Phase 7.3: Extract content first if documents provided, then use contentParts - # Check if contentParts are already provided (preferred path) + # Phase 7.3: Pass both documentList and contentParts to AI service + # (Extraction logic removed - handled by AI service) contentParts: Optional[List[ContentPart]] = None if "contentParts" in parameters: - contentParts = parameters.get("contentParts") - if contentParts and not isinstance(contentParts, list): - # Try to extract from ContentExtracted if it's an ActionDocument - if hasattr(contentParts, 'parts'): - contentParts = contentParts.parts + contentPartsParam = parameters.get("contentParts") + if contentPartsParam: + if isinstance(contentPartsParam, list): + contentParts = contentPartsParam + elif hasattr(contentPartsParam, 'parts'): + # Extract from ContentExtracted if it's an ActionDocument + contentParts = contentPartsParam.parts else: - logger.warning(f"Invalid contentParts type: {type(contentParts)}, treating as empty") + logger.warning(f"Invalid contentParts type: {type(contentPartsParam)}, treating as empty") contentParts = None - # If contentParts not provided but documentList is, extract content first - if not contentParts and documentList.references: - self.services.chat.progressLogUpdate(operationId, 0.3, "Extracting content from documents") - - # Get ChatDocuments - chatDocuments = self.services.chat.getChatDocumentsFromDocumentList(documentList) - if not chatDocuments: - logger.warning("No documents found in documentList") - else: - logger.info(f"Extracting content from {len(chatDocuments)} documents") - - # Prepare extraction options (use defaults if not provided) - from modules.datamodels.datamodelExtraction import ExtractionOptions, MergeStrategy - extractionOptions = parameters.get("extractionOptions") - if not extractionOptions: - extractionOptions = ExtractionOptions( - prompt="Extract all content from the document", - mergeStrategy=MergeStrategy( - mergeType="concatenate", - groupBy="typeGroup", - orderBy="id" - ), - processDocumentsIndividually=True - ) - - # Extract content using extraction service - extractedResults = self.services.extraction.extractContent(chatDocuments, extractionOptions) - - # Combine all ContentParts from all extracted results - contentParts = [] - for extracted in extractedResults: - if extracted.parts: - contentParts.extend(extracted.parts) - - logger.info(f"Extracted {len(contentParts)} content parts from {len(extractedResults)} documents") - # Update progress - preparing AI call self.services.chat.progressLogUpdate(operationId, 0.4, "Preparing AI call") @@ -136,8 +102,9 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: # Build options with correct operationType from modules.datamodels.datamodelAi import OperationTypeEnum + # resultFormat in options can be None - formats will be determined by AI if not provided options = AiCallOptions( - resultFormat=output_format or "txt", # Fallback for options, but outputFormat can be None for callAiContent + resultFormat=output_format, # Can be None - formats determined by AI operationType=OperationTypeEnum.IMAGE_GENERATE if isImageGeneration else OperationTypeEnum.DATA_GENERATE ) @@ -149,13 +116,14 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: # Update progress - calling AI self.services.chat.progressLogUpdate(operationId, 0.6, "Calling AI") - # Use unified callAiContent method with contentParts (extraction is now separate) - # ContentParts are already extracted above (or None if no documents) + # Use unified callAiContent method with BOTH documentList and contentParts + # Extraction is handled by AI service - no extraction here # outputFormat: Optional - if None, formats determined from prompt by AI aiResponse = await self.services.ai.callAiContent( prompt=aiPrompt, options=options, - contentParts=contentParts, # Already extracted (or None if no documents) + documentList=documentList, # Pass documentList - AI service handles extraction + contentParts=contentParts, # Pass contentParts if provided (or None) outputFormat=output_format, # Can be None - AI determines from prompt parentOperationId=operationId, generationIntent=generationIntent # REQUIRED for DATA_GENERATE @@ -186,6 +154,7 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: final_documents = action_documents else: # Text response - create document from content + # If no extension provided, use "txt" (required for filename) extension = output_extension.lstrip('.') if output_extension else "txt" meaningful_name = self._generateMeaningfulFileName( base_name="ai", @@ -194,8 +163,8 @@ async def process(self, parameters: Dict[str, Any]) -> ActionResult: ) validationMetadata = { "actionType": "ai.process", - "resultType": normalized_result_type or "auto", - "outputFormat": output_format or "auto", + "resultType": normalized_result_type if normalized_result_type else None, + "outputFormat": output_format if output_format else None, "hasDocuments": False, "contentType": "text" } diff --git a/modules/workflows/methods/methodAi/methodAi.py b/modules/workflows/methods/methodAi/methodAi.py index 6aff6047..234d573b 100644 --- a/modules/workflows/methods/methodAi/methodAi.py +++ b/modules/workflows/methods/methodAi/methodAi.py @@ -36,7 +36,7 @@ class MethodAi(MethodBase): self._actions = { "process": WorkflowActionDefinition( actionId="ai.process", - description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt", + description="Universal AI document processing action - accepts multiple input documents in any format and processes them together with a prompt. If the prompt specifies document formats to deliver, include them in the prompt", dynamicMode=True, parameters={ "aiPrompt": WorkflowActionParameter( @@ -70,7 +70,14 @@ class MethodAi(MethodBase): required=False, default="document", description="Explicit generation intent (\"document\" | \"code\" | \"image\"). Required for DATA_GENERATE operations. Defaults to \"document\" if not provided. For code generation, use ai.generateCode action or explicitly pass generationIntent=\"code\". For IMAGE_GENERATE operations, this parameter is ignored." - ) + ), + "contentParts": WorkflowActionParameter( + name="contentParts", + type="List[ContentPart]", + frontendType=FrontendType.HIDDEN, + required=False, + description="Pre-extracted content parts (internal parameter, typically passed between actions). If provided, these will be used instead of extracting from documentList. Can be a list of ContentPart objects or an object with a 'parts' attribute." + ), }, execute=process.__get__(self, self.__class__) ), @@ -122,7 +129,7 @@ class MethodAi(MethodBase): ), "summarizeDocument": WorkflowActionDefinition( actionId="ai.summarizeDocument", - description="Summarize one or more documents, extracting key points and main ideas", + description="Summarize one or more documents, extracting key points and main ideas. If the prompt specifies document formats to deliver, include them in the prompt", dynamicMode=True, parameters={ "documentList": WorkflowActionParameter( @@ -237,7 +244,7 @@ class MethodAi(MethodBase): ), "generateDocument": WorkflowActionDefinition( actionId="ai.generateDocument", - description="Generate documents from scratch or based on templates/inputs", + description="Generate documents from scratch or based on templates/inputs. If the prompt specifies document formats to deliver, include them in the prompt", dynamicMode=True, parameters={ "prompt": WorkflowActionParameter( @@ -275,7 +282,7 @@ class MethodAi(MethodBase): ), "generateCode": WorkflowActionDefinition( actionId="ai.generateCode", - description="Generate code files - explicitly sets intent to 'code'", + description="Generate code files - explicitly sets intent to 'code'. If the prompt specifies file formats to deliver, include them in the prompt", dynamicMode=True, parameters={ "prompt": WorkflowActionParameter(