# Copyright (c) 2025 Patrick Motsch # All rights reserved. import json import logging import re import time import base64 from typing import Dict, Any, List, Optional, Tuple from modules.aichat.datamodelFeatureAiChat import PromptPlaceholder, ChatDocument from modules.aichat.serviceExtraction.mainServiceExtraction import ExtractionService from modules.datamodels.datamodelAi import AiCallRequest, AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.datamodels.datamodelExtraction import ContentPart, DocumentIntent from modules.datamodels.datamodelWorkflow import AiResponse, AiResponseMetadata, DocumentData from modules.datamodels.datamodelDocument import RenderedDocument from modules.interfaces.interfaceAiObjects import AiObjects from modules.shared.jsonUtils import ( parseJsonWithModel ) from .subJsonResponseHandling import JsonResponseHandler from modules.datamodels.datamodelAi import JsonAccumulationState logger = logging.getLogger(__name__) # Rebuild the model to resolve forward references AiCallRequest.model_rebuild() class AiService: """AI service with core operations integrated.""" def __init__(self, serviceCenter=None) -> None: """Initialize AI service with service center access. Args: serviceCenter: Service center instance for accessing other services """ self.services = serviceCenter # Only depend on interfaces self.aiObjects = None # Will be initialized in create() or ensureAiObjectsInitialized() # Submodules initialized as None - will be set in _initializeSubmodules() after aiObjects is ready self.extractionService = None def _initializeSubmodules(self): """Initialize all submodules after aiObjects is ready.""" if self.aiObjects is None: raise RuntimeError("aiObjects must be initialized before initializing submodules") if self.extractionService is None: logger.info("Initializing ExtractionService...") self.extractionService = ExtractionService(self.services) # Initialize new submodules from .subResponseParsing import ResponseParser from .subDocumentIntents import DocumentIntentAnalyzer from .subContentExtraction import ContentExtractor from .subStructureGeneration import StructureGenerator from .subStructureFilling import StructureFiller from .subAiCallLooping import AiCallLooper if not hasattr(self, 'responseParser'): logger.info("Initializing ResponseParser...") self.responseParser = ResponseParser(self.services) if not hasattr(self, 'intentAnalyzer'): logger.info("Initializing DocumentIntentAnalyzer...") self.intentAnalyzer = DocumentIntentAnalyzer(self.services, self) if not hasattr(self, 'contentExtractor'): logger.info("Initializing ContentExtractor...") self.contentExtractor = ContentExtractor(self.services, self, self.intentAnalyzer) if not hasattr(self, 'structureGenerator'): logger.info("Initializing StructureGenerator...") self.structureGenerator = StructureGenerator(self.services, self) if not hasattr(self, 'structureFiller'): logger.info("Initializing StructureFiller...") self.structureFiller = StructureFiller(self.services, self) if not hasattr(self, 'aiCallLooper'): logger.info("Initializing AiCallLooper...") self.aiCallLooper = AiCallLooper(self.services, self, self.responseParser) async def callAi(self, request: AiCallRequest, progressCallback=None): """Router: handles content parts via extractionService, text context via interface. Replaces direct calls to self.aiObjects.call() to route content parts processing through serviceExtraction layer. """ if hasattr(request, 'contentParts') and request.contentParts: return await self.extractionService.processContentPartsWithAi( request, self.aiObjects, progressCallback ) return await self.aiObjects.callWithTextContext(request) async def ensureAiObjectsInitialized(self): """Ensure aiObjects is initialized and submodules are ready.""" if self.aiObjects is None: logger.info("Lazy initializing AiObjects...") self.aiObjects = await AiObjects.create() logger.info("AiObjects initialization completed") # Initialize submodules after aiObjects is ready self._initializeSubmodules() @classmethod async def create(cls, serviceCenter=None) -> "AiService": """Create AiService instance with all connectors and submodules initialized.""" logger.info("AiService.create() called") instance = cls(serviceCenter) logger.info("AiService created, about to call AiObjects.create()...") instance.aiObjects = await AiObjects.create() logger.info("AiObjects.create() completed") # Initialize all submodules after aiObjects is ready instance._initializeSubmodules() logger.info("AiService submodules initialized") return instance # Helper methods def _buildPromptWithPlaceholders(self, prompt: str, placeholders: Optional[Dict[str, str]]) -> str: """ Build full prompt by replacing placeholders with their content. Uses the new {{KEY:placeholder}} format. Args: prompt: The base prompt template placeholders: Dictionary of placeholder key-value pairs Returns: Prompt with placeholders replaced """ if not placeholders: return prompt full_prompt = prompt for placeholder, content in placeholders.items(): # Skip if content is None or empty if content is None: continue # Replace {{KEY:placeholder}} full_prompt = full_prompt.replace(f"{{{{KEY:{placeholder}}}}}", str(content)) return full_prompt async def _analyzePromptAndCreateOptions(self, prompt: str) -> AiCallOptions: """Analyze prompt to determine appropriate AiCallOptions parameters.""" try: # Get dynamic enum values from Pydantic models operationTypes = [e.value for e in OperationTypeEnum] priorities = [e.value for e in PriorityEnum] processingModes = [e.value for e in ProcessingModeEnum] # Create analysis prompt for AI to determine operation type and parameters analysisPrompt = f""" You are an AI operation analyzer. Analyze the following prompt and determine the most appropriate operation type and parameters. PROMPT TO ANALYZE: {self.services.utils.sanitizePromptContent(prompt, 'userinput')} Based on the prompt content, determine: 1. operationType: Choose the most appropriate from: {', '.join(operationTypes)} 2. priority: Choose from: {', '.join(priorities)} 3. processingMode: Choose from: {', '.join(processingModes)} 4. compressPrompt: true/false (true for story-like prompts, false for structured prompts with JSON/schemas) 5. compressContext: true/false (true to summarize context, false to process fully) Respond with ONLY a JSON object in this exact format: {{ "operationType": "dataAnalyse", "priority": "balanced", "processingMode": "basic", "compressPrompt": true, "compressContext": true }} """ # Use AI to analyze the prompt request = AiCallRequest( prompt=analysisPrompt, options=AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.SPEED, processingMode=ProcessingModeEnum.BASIC, compressPrompt=True, compressContext=False ) ) response = await self.callAi(request) # Parse AI response using structured parsing with AiCallOptions model try: # Use parseJsonWithModel to parse response into AiCallOptions (handles enum conversion automatically) analysis = parseJsonWithModel(response.content, AiCallOptions) return analysis except Exception as e: logger.warning(f"Failed to parse AI analysis response: {e}") except Exception as e: logger.warning(f"Prompt analysis failed: {e}") # Fallback to default options return AiCallOptions( operationType=OperationTypeEnum.DATA_ANALYSE, priority=PriorityEnum.BALANCED, processingMode=ProcessingModeEnum.BASIC ) async def callAiWithLooping( self, prompt: str, options: AiCallOptions, debugPrefix: str = "ai_call", promptBuilder: Optional[callable] = None, promptArgs: Optional[Dict[str, Any]] = None, operationId: Optional[str] = None, userPrompt: Optional[str] = None, contentParts: Optional[List[ContentPart]] = None, # ARCHITECTURE: Support ContentParts for large content useCaseId: Optional[str] = None # REQUIRED: Explicit use case ID for generic looping system ) -> str: """Public method: Delegate to AiCallLooper for AI calls with looping support.""" return await self.aiCallLooper.callAiWithLooping( prompt, options, debugPrefix, promptBuilder, promptArgs, operationId, userPrompt, contentParts, useCaseId ) # JSON merging logic moved to subJsonResponseHandling.py def _extractSectionsFromResponse( self, result: str, iteration: int, debugPrefix: str, allSections: List[Dict[str, Any]] = None, accumulationState: Optional[JsonAccumulationState] = None ) -> Tuple[List[Dict[str, Any]], bool, Optional[Dict[str, Any]], Optional[JsonAccumulationState]]: """Delegate to ResponseParser.""" return self.responseParser.extractSectionsFromResponse( result, iteration, debugPrefix, allSections, accumulationState ) def _shouldContinueGeneration( self, allSections: List[Dict[str, Any]], iteration: int, wasJsonComplete: bool, rawResponse: str = None ) -> bool: """Delegate to ResponseParser.""" return self.responseParser.shouldContinueGeneration( allSections, iteration, wasJsonComplete, rawResponse ) def _extractDocumentMetadata( self, parsedResult: Dict[str, Any] ) -> Optional[Dict[str, Any]]: """Delegate to ResponseParser.""" return self.responseParser.extractDocumentMetadata(parsedResult) def _buildFinalResultFromSections( self, allSections: List[Dict[str, Any]], documentMetadata: Optional[Dict[str, Any]] = None ) -> str: """Delegate to ResponseParser.""" return self.responseParser.buildFinalResultFromSections(allSections, documentMetadata) # Public API Methods # Planning AI Call async def callAiPlanning( self, prompt: str, placeholders: Optional[List[PromptPlaceholder]] = None, debugType: Optional[str] = None ) -> str: """ Planning AI call for task planning, action planning, action selection, etc. Always uses static parameters optimized for planning tasks. Args: prompt: The planning prompt placeholders: Optional list of placeholder replacements debugType: Optional debug file type identifier (e.g., 'taskplan', 'dynamic', 'intentanalysis') If not provided, defaults to 'plan' Returns: Planning JSON response """ await self.ensureAiObjectsInitialized() # Planning calls always use static parameters options = AiCallOptions( operationType=OperationTypeEnum.PLAN, priority=PriorityEnum.QUALITY, processingMode=ProcessingModeEnum.DETAILED, compressPrompt=False, compressContext=False ) # Build full prompt with placeholders if placeholders: placeholdersDict = {p.label: p.content for p in placeholders} fullPrompt = self._buildPromptWithPlaceholders(prompt, placeholdersDict) else: fullPrompt = prompt # Root-cause fix: planning must return raw single-shot JSON, not section-based output request = AiCallRequest( prompt=fullPrompt, context="", options=options ) # Debug: persist prompt/response for analysis with context-specific naming debugPrefix = debugType if debugType else "plan" self.services.utils.writeDebugFile(fullPrompt, f"{debugPrefix}_prompt") response = await self.aiObjects.callWithTextContext(request) result = response.content or "" self.services.utils.writeDebugFile(result, f"{debugPrefix}_response") return result # Helper methods for callAiContent refactoring async def _handleImageGeneration( self, prompt: str, options: AiCallOptions, title: Optional[str], parentOperationId: Optional[str] ) -> AiResponse: """Handle IMAGE_GENERATE operation type using image generation path.""" from modules.aichat.serviceGeneration.paths.imagePath import ImageGenerationPath imagePath = ImageGenerationPath(self.services) # Extract format from options format = options.resultFormat or "png" return await imagePath.generateImages( userPrompt=prompt, format=format, title=title, parentOperationId=parentOperationId ) async def _handleWebOperation( self, prompt: str, options: AiCallOptions, opType: OperationTypeEnum, aiOperationId: str ) -> AiResponse: """Handle WEB_SEARCH_DATA and WEB_CRAWL operation types.""" self.services.chat.progressLogUpdate(aiOperationId, 0.4, f"Calling AI for {opType.name}") request = AiCallRequest( prompt=prompt, # Raw JSON prompt - connector will parse it context="", options=options ) response = await self.callAi(request) if not response.content: errorMsg = f"No content returned from {opType.name}: {response.content}" logger.error(f"Error in {opType.name}: {errorMsg}") self.services.chat.progressLogFinish(aiOperationId, False) raise ValueError(errorMsg) metadata = AiResponseMetadata( operationType=opType.value ) # Try to store workflow stats, but don't fail if workflow is None (e.g., in chatbot context) try: self.services.chat.storeWorkflowStat( self.services.workflow, response, f"ai.{opType.name.lower()}" ) except Exception as e: # Log but don't fail - workflow might be None in some contexts (e.g., chatbot) logger.debug(f"Could not store workflow stat (workflow may be None): {str(e)}") self.services.chat.progressLogUpdate(aiOperationId, 0.9, f"{opType.name} completed") self.services.chat.progressLogFinish(aiOperationId, True) # Preserve metadata from response if available (e.g., results_with_content from Tavily) # Check if response has metadata attribute (AiCallResponse from callAi) if hasattr(response, 'metadata') and response.metadata: # If metadata is a dict, store it in additionalData if isinstance(response.metadata, dict): if not metadata.additionalData: metadata.additionalData = {} metadata.additionalData.update(response.metadata) # If metadata is an object with attributes, extract them elif hasattr(response.metadata, '__dict__'): if not metadata.additionalData: metadata.additionalData = {} for key, value in response.metadata.__dict__.items(): if not key.startswith('_'): metadata.additionalData[key] = value return AiResponse( content=response.content, metadata=metadata ) def _getIntentForDocument( self, docId: str, intents: Optional[List[DocumentIntent]] ) -> Optional[DocumentIntent]: """Find DocumentIntent for given documentId.""" if not intents: return None for intent in intents: if intent.documentId == docId: return intent return None async def clarifyDocumentIntents( self, documents: List[ChatDocument], userPrompt: str, actionParameters: Dict[str, Any], parentOperationId: str ) -> List[DocumentIntent]: """Public method: Delegate to DocumentIntentAnalyzer.""" return await self.intentAnalyzer.clarifyDocumentIntents( documents, userPrompt, actionParameters, parentOperationId ) async def extractAndPrepareContent( self, documents: List[ChatDocument], documentIntents: List[DocumentIntent], parentOperationId: str ) -> List[ContentPart]: """Public method: Delegate to ContentExtractor.""" return await self.contentExtractor.extractAndPrepareContent( documents, documentIntents, parentOperationId, self._getIntentForDocument ) async def generateStructure( self, userPrompt: str, contentParts: List[ContentPart], outputFormat: Optional[str] = None, parentOperationId: str = None ) -> Dict[str, Any]: """Public method: Delegate to StructureGenerator.""" return await self.structureGenerator.generateStructure( userPrompt, contentParts, outputFormat, parentOperationId ) async def fillStructure( self, structure: Dict[str, Any], contentParts: List[ContentPart], userPrompt: str, parentOperationId: str ) -> Dict[str, Any]: """Public method: Delegate to StructureFiller.""" return await self.structureFiller.fillStructure( structure, contentParts, userPrompt, parentOperationId ) async def renderResult( self, filledStructure: Dict[str, Any], outputFormat: str, language: str, title: str, userPrompt: str, parentOperationId: str ) -> List[RenderedDocument]: """ Phase 5E: Rendert gefüllte Struktur zum Ziel-Format. Jedes Dokument wird einzeln gerendert, jeder Renderer kann 1..n Dokumente zurückgeben. Render filled structure to documents. Per-document format and language are extracted from structure (validated in State 3). The outputFormat and language parameters are only used as global fallbacks. Multiple documents can have different formats and languages. Args: filledStructure: Gefüllte Struktur mit elements outputFormat: Ziel-Format (pdf, docx, html, etc.) - Global fallback language: Language (global fallback) - Per-document language extracted from structure title: Dokument-Titel userPrompt: User-Anfrage parentOperationId: Parent Operation-ID für ChatLog-Hierarchie Returns: List of RenderedDocument objects. Jedes RenderedDocument repräsentiert ein gerendertes Dokument (Hauptdokument oder unterstützende Datei) """ # Language comes from structure (per-document), validated in State 3 # This parameter is only used as global fallback if structure validation fails # Use validated currentUserLanguage as fallback (always valid) if not language: language = self._getUserLanguage() if hasattr(self, '_getUserLanguage') else (self.services.currentUserLanguage if hasattr(self.services, 'currentUserLanguage') else 'en') # Erstelle Operation-ID für Rendering renderOperationId = f"{parentOperationId}_rendering" # Starte ChatLog mit Parent-Referenz self.services.chat.progressLogStart( renderOperationId, "Content Rendering", "Rendering", f"Rendering to {outputFormat} format", parentOperationId=parentOperationId ) try: from modules.aichat.serviceGeneration.mainServiceGeneration import GenerationService generationService = GenerationService(self.services) # renderReport verarbeitet jetzt jedes Dokument einzeln # und gibt Liste von (documentData, mimeType, filename) zurück renderedDocuments = await generationService.renderReport( filledStructure, outputFormat, language, # Pass language (global fallback, per-document extracted in renderReport) title, userPrompt, self, parentOperationId=renderOperationId # Parent-Referenz für ChatLog-Hierarchie ) # ChatLog abschließen self.services.chat.progressLogFinish(renderOperationId, True) return renderedDocuments except Exception as e: self.services.chat.progressLogFinish(renderOperationId, False) logger.error(f"Error in _renderResult: {str(e)}") raise def _shouldSkipContentPart( self, part: ContentPart ) -> bool: """Check if ContentPart should be skipped (already structured JSON).""" if part.typeGroup == "structure" and part.mimeType == "application/json": if part.metadata.get("skipExtraction", False): logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (skipExtraction=True)") return True try: if isinstance(part.data, str): jsonData = json.loads(part.data) if isinstance(jsonData, dict) and ("documents" in jsonData or "sections" in jsonData): logger.debug(f"Skipping already-structured JSON ContentPart {part.id} (contains documents/sections)") return True except Exception: pass # Not JSON, continue processing return False async def callAiContent( self, prompt: str, options: AiCallOptions, contentParts: Optional[List[ContentPart]] = None, documentList: Optional[Any] = None, # DocumentReferenceList documentIntents: Optional[List[DocumentIntent]] = None, outputFormat: Optional[str] = None, title: Optional[str] = None, parentOperationId: Optional[str] = None, generationIntent: Optional[str] = None # NEW: Explicit intent from action (skips detection) ) -> AiResponse: """ Unified AI content generation with explicit intent requirement. All AI-Actions (ai.process, ai.generateDocument, etc.) route through here. They differ only in parameters, not in logic. Args: prompt: The main prompt for the AI call options: AI call configuration options (REQUIRED - operationType must be set) contentParts: Optional list of already-extracted content parts (preferred) documentList: Optional DocumentReferenceList (wird zu ChatDocuments konvertiert) documentIntents: Optional list of DocumentIntent objects (wird erstellt wenn nicht vorhanden) outputFormat: Optional output format for document generation (e.g., 'pdf', 'docx', 'xlsx') title: Optional title for generated documents parentOperationId: Optional parent operation ID for hierarchical logging generationIntent: REQUIRED explicit intent ("document" | "code" | "image") from action. NO auto-detection - actions must explicitly specify intent. Returns: AiResponse with content, metadata, and optional documents """ await self.ensureAiObjectsInitialized() # Erstelle Operation-ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" aiOperationId = f"ai_content_{workflowId}_{int(time.time())}" # Starte Progress-Tracking mit Parent-Referenz formatDisplay = outputFormat if outputFormat else "auto-determined" self.services.chat.progressLogStart( aiOperationId, "AI content processing", "Content Processing", f"Format: {formatDisplay}", parentOperationId=parentOperationId ) try: # outputFormat is optional - if None, formats determined from prompt by AI # No default fallback here - let AI service handle it opType = getattr(options, "operationType", None) if not opType: options.operationType = OperationTypeEnum.DATA_GENERATE opType = OperationTypeEnum.DATA_GENERATE # Route zu Operation-spezifischen Handlern if opType == OperationTypeEnum.IMAGE_GENERATE: # Image generation - route to image path return await self._handleImageGeneration(prompt, options, title, parentOperationId) if opType == OperationTypeEnum.WEB_SEARCH_DATA or opType == OperationTypeEnum.WEB_CRAWL: return await self._handleWebOperation(prompt, options, opType, aiOperationId) # Data generation - REQUIRES explicit generationIntent if opType == OperationTypeEnum.DATA_GENERATE: if not generationIntent: errorMsg = ( "generationIntent is required for DATA_GENERATE operation. " "Actions must explicitly specify 'document' or 'code' intent. " "No auto-detection - use qualified actions (ai.generateDocument, ai.generateCode)." ) logger.error(errorMsg) self.services.chat.progressLogFinish(aiOperationId, False) raise ValueError(errorMsg) # Route based on explicit intent (no auto-detection, no fallback) if generationIntent == "code": # Route to code generation path return await self._handleCodeGeneration( prompt=prompt, options=options, contentParts=contentParts, outputFormat=outputFormat, title=title, parentOperationId=parentOperationId ) else: # Route to document generation path (existing behavior) return await self._handleDocumentGeneration( prompt=prompt, options=options, documentList=documentList, documentIntents=documentIntents, contentParts=contentParts, outputFormat=outputFormat, title=title, parentOperationId=parentOperationId ) # DATA_EXTRACT: Extract content from documents and process with AI (no structure generation) if opType == OperationTypeEnum.DATA_EXTRACT: return await self._handleDataExtraction( prompt=prompt, options=options, documentList=documentList, documentIntents=documentIntents, contentParts=contentParts, outputFormat=outputFormat, title=title, parentOperationId=parentOperationId ) # Other operation types (DATA_ANALYSE, etc.) - not supported errorMsg = f"Unsupported operation type: {opType}. Supported types: IMAGE_GENERATE, DATA_GENERATE, DATA_EXTRACT" logger.error(errorMsg) self.services.chat.progressLogFinish(aiOperationId, False) raise ValueError(errorMsg) except Exception as e: logger.error(f"Error in callAiContent: {str(e)}") self.services.chat.progressLogFinish(aiOperationId, False) raise async def _handleDataExtraction( self, prompt: str, options: AiCallOptions, documentList: Optional[Any], documentIntents: Optional[List[DocumentIntent]], contentParts: Optional[List[ContentPart]], outputFormat: str, title: str, parentOperationId: Optional[str] ) -> AiResponse: """ Handle DATA_EXTRACT: Extract content from documents (no AI), then process with AI. This is the original flow: extract all documents first, then process contentParts with AI. """ import time # Create operation ID workflowId = self.services.workflow.id if self.services.workflow else f"no-workflow-{int(time.time())}" extractOperationId = f"data_extract_{workflowId}_{int(time.time())}" # Start progress tracking self.services.chat.progressLogStart( extractOperationId, "Data Extraction", "Extraction", f"Format: {outputFormat}", parentOperationId=parentOperationId ) try: # Step 1: Get documents from documentList documents = [] if documentList: documents = self.services.chat.getChatDocumentsFromDocumentList(documentList) # Filter: Remove original documents if already covered by pre-extracted JSONs # (to prevent duplicate ContentParts - pre-extracted JSONs contain already extracted ContentParts) if documents: # Step 1: Identify all original document IDs covered by pre-extracted JSONs originalDocIdsCoveredByPreExtracted = set() for doc in documents: preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) if preExtracted: originalDocId = preExtracted["originalDocument"]["id"] originalDocIdsCoveredByPreExtracted.add(originalDocId) logger.debug(f"Found pre-extracted JSON {doc.id} covering original document {originalDocId}") # Step 2: Filter documents - remove originals covered by pre-extracted JSONs filteredDocuments = [] for doc in documents: preExtracted = self.intentAnalyzer.resolvePreExtractedDocument(doc) if preExtracted: filteredDocuments.append(doc) # Keep pre-extracted JSON elif doc.id in originalDocIdsCoveredByPreExtracted: logger.info(f"Skipping original document {doc.id} ({doc.fileName}) - already covered by pre-extracted JSON") else: filteredDocuments.append(doc) # Keep regular document documents = filteredDocuments # Use filtered list # Step 2: Clarify document intents (if not provided) - REQUIRED for all documents if not documentIntents and documents: documentIntents = await self.clarifyDocumentIntents( documents, prompt, {"outputFormat": outputFormat}, extractOperationId ) # Step 3: Extract and prepare content (NO AI - pure extraction) - REQUIRED for all documents if documents: preparedContentParts = await self.extractAndPrepareContent( documents, documentIntents or [], extractOperationId ) # Merge with provided contentParts (if any) if contentParts: for part in contentParts: if part.metadata.get("skipExtraction", False): part.metadata.setdefault("contentFormat", "extracted") part.metadata.setdefault("isPreExtracted", True) preparedContentParts.extend(contentParts) contentParts = preparedContentParts # Step 4: Process extracted contentParts with AI (simple text processing, no structure generation) if not contentParts: raise ValueError("No content extracted from documents") # Use simple AI call to process extracted content # Prepare content for AI processing contentText = "\n\n".join([ f"[Document: {part.metadata.get('documentName', 'Unknown')}]\n{part.data}" for part in contentParts if part.data ]) # Call AI with extracted content aiRequest = AiCallRequest( prompt=f"{prompt}\n\nExtracted Content:\n{contentText}", context="", options=options ) aiResponse = await self.callAi(aiRequest) # Create response document resultDocument = DocumentData( documentName=f"{title or 'extracted_data'}.{outputFormat}", documentData=aiResponse.content.encode('utf-8') if isinstance(aiResponse.content, str) else aiResponse.content, mimeType=f"text/{outputFormat}" if outputFormat in ["txt", "json", "csv"] else "application/octet-stream" ) metadata = AiResponseMetadata( title=title or "Extracted Data", operationType=OperationTypeEnum.DATA_EXTRACT.value ) self.services.chat.progressLogFinish(extractOperationId, True) return AiResponse( content=aiResponse.content if isinstance(aiResponse.content, str) else aiResponse.content.decode('utf-8', errors='replace'), metadata=metadata, documents=[resultDocument] ) except Exception as e: logger.error(f"Error in data extraction: {str(e)}") self.services.chat.progressLogFinish(extractOperationId, False) raise async def _handleCodeGeneration( self, prompt: str, options: AiCallOptions, contentParts: Optional[List[ContentPart]], outputFormat: str, title: str, parentOperationId: Optional[str] ) -> AiResponse: """Handle code generation using code generation path.""" from modules.aichat.serviceGeneration.paths.codePath import CodeGenerationPath codePath = CodeGenerationPath(self.services) return await codePath.generateCode( userPrompt=prompt, outputFormat=outputFormat, contentParts=contentParts, title=title or "Generated Code", parentOperationId=parentOperationId ) async def _handleDocumentGeneration( self, prompt: str, options: AiCallOptions, documentList: Optional[Any], documentIntents: Optional[List[DocumentIntent]], contentParts: Optional[List[ContentPart]], outputFormat: str, title: str, parentOperationId: Optional[str] ) -> AiResponse: """Handle document generation using document generation path.""" from modules.aichat.serviceGeneration.paths.documentPath import DocumentGenerationPath # Set compression options for document generation options.compressPrompt = False options.compressContext = False documentPath = DocumentGenerationPath(self.services) return await documentPath.generateDocument( userPrompt=prompt, documentList=documentList, documentIntents=documentIntents, contentParts=contentParts, outputFormat=outputFormat, title=title or "Generated Document", parentOperationId=parentOperationId ) def _determineDocumentName( self, filledStructure: Dict[str, Any], outputFormat: str, title: Optional[str] ) -> str: """Bestimme Dokument-Namen aus Struktur oder Titel.""" # Versuche aus Struktur zu extrahieren if isinstance(filledStructure, dict) and "documents" in filledStructure: docs = filledStructure["documents"] if isinstance(docs, list) and len(docs) > 0: firstDoc = docs[0] if isinstance(firstDoc, dict) and firstDoc.get("filename"): return firstDoc["filename"] # Fallback zu Titel if title: sanitized = re.sub(r"[^a-zA-Z0-9._-]", "_", title) sanitized = re.sub(r"_+", "_", sanitized).strip("_") if sanitized: if not sanitized.lower().endswith(f".{outputFormat}"): return f"{sanitized}.{outputFormat}" return sanitized return f"generated.{outputFormat}"