import logging import uuid from typing import Any, Dict, List, Optional from datetime import datetime, UTC import re from modules.shared.timezoneUtils import get_utc_timestamp from modules.datamodels.datamodelChat import ChatDocument from modules.services.serviceGeneration.subDocumentUtility import ( getFileExtension, getMimeTypeFromExtension, detectMimeTypeFromContent, detectMimeTypeFromData, convertDocumentDataToString ) logger = logging.getLogger(__name__) class GenerationService: def __init__(self, serviceCenter=None): # Directly use interfaces from the provided service center (no self.service calls) self.services = serviceCenter self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]: """ Process documents produced by AI actions and convert them to ChatDocument format. This function handles AI-generated document data, not document references. Returns a list of processed document dictionaries. """ try: # Read documents from the standard documents field (not data.documents) documents = action_result.documents if action_result and hasattr(action_result, 'documents') else [] if not documents: logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}") return [] logger.info(f"Processing {len(documents)} documents from action_result.documents") # Process each document from the AI action result processed_documents = [] for doc in documents: processed_doc = self.processSingleDocument(doc, action) if processed_doc: processed_documents.append(processed_doc) logger.info(f"Successfully processed {len(processed_documents)} documents") return processed_documents except Exception as e: logger.error(f"Error processing action result documents: {str(e)}") return [] def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]: """Process a single document from action result with simplified logic""" try: # ActionDocument objects have documentName, documentData, and mimeType mime_type = doc.mimeType if mime_type == "application/octet-stream": content = doc.documentData # Detect MIME without relying on a service center mime_type = detectMimeTypeFromContent(content, doc.documentName) return { 'fileName': doc.documentName, 'fileSize': len(str(doc.documentData)), 'mimeType': mime_type, 'content': doc.documentData, 'document': doc } except Exception as e: logger.error(f"Error processing single document: {str(e)}") return None def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]: """ Create actual document objects from action result and store them in the system. Returns a list of created document objects with proper workflow context. """ try: logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}") logger.info(f"Action result documents count: {len(action_result.documents) if action_result.documents else 0}") processed_docs = self.processActionResultDocuments(action_result, action, workflow) logger.info(f"Processed {len(processed_docs)} documents") created_documents = [] for i, doc_data in enumerate(processed_docs): try: document_name = doc_data['fileName'] document_data = doc_data['content'] mime_type = doc_data['mimeType'] logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})") # Convert document data to string content content = convertDocumentDataToString(document_data, getFileExtension(document_name)) # Skip empty or minimal content minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] if not content or content.strip() == "" or content.strip() in minimal_content_patterns: logger.warning(f"Empty or minimal content for document {document_name}, skipping") continue logger.info(f"Document {document_name} has content: {len(content)} characters") # Normalize file extension based on mime type if missing or incorrect try: mime_to_ext = { "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx", "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx", "application/pdf": ".pdf", "text/html": ".html", "text/markdown": ".md", "text/plain": ".txt", "application/json": ".json", } expected_ext = mime_to_ext.get(mime_type) if expected_ext: if not document_name.lower().endswith(expected_ext): # Append/replace extension to match mime type if "." in document_name: document_name = document_name.rsplit(".", 1)[0] + expected_ext else: document_name = document_name + expected_ext except Exception: pass # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text base64encoded = False try: binary_mime_types = { "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.openxmlformats-officedocument.presentationml.presentation", "application/pdf", } if isinstance(document_data, str) and mime_type in binary_mime_types: base64encoded = True except Exception: base64encoded = False # Create document with file in one step using interfaces directly document = self._createDocument( fileName=document_name, mimeType=mime_type, content=content, base64encoded=base64encoded, messageId=message_id ) if document: # Set workflow context on the document if possible self._setDocumentWorkflowContext(document, action, workflow) created_documents.append(document) logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})") else: logger.error(f"Failed to create ChatDocument object for {document_name}") except Exception as e: logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}") continue logger.info(f"Successfully created {len(created_documents)} documents") return created_documents except Exception as e: logger.error(f"Error creating documents from action result: {str(e)}") return [] def _setDocumentWorkflowContext(self, document, action, workflow): """Set workflow context on a document for proper routing and labeling""" try: # Get current workflow context directly from workflow object workflow_context = self._getWorkflowContext(workflow) workflow_stats = self._getWorkflowStats(workflow) current_round = workflow_context.get('currentRound', 0) current_task = workflow_context.get('currentTask', 0) current_action = workflow_context.get('currentAction', 0) # Try to set workflow context attributes if they exist if hasattr(document, 'roundNumber'): document.roundNumber = current_round if hasattr(document, 'taskNumber'): document.taskNumber = current_task if hasattr(document, 'actionNumber'): document.actionNumber = current_action if hasattr(document, 'actionId'): document.actionId = action.id if hasattr(action, 'id') else None # Set additional workflow metadata if available if hasattr(document, 'workflowId'): document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None) if hasattr(document, 'workflowStatus'): document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown') logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}") logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}") except Exception as e: logger.warning(f"Could not set workflow context on document: {str(e)}") def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]: """Create file and ChatDocument using interfaces without service indirection.""" try: if not self.interfaceDbComponent: logger.error("Component interface not available for document creation") return None # Convert content to bytes if base64encoded: import base64 content_bytes = base64.b64decode(content) else: content_bytes = content.encode('utf-8') # Create file and store data file_item = self.interfaceDbComponent.createFile( name=fileName, mimeType=mimeType, content=content_bytes ) self.interfaceDbComponent.createFileData(file_item.id, content_bytes) # Collect file info file_info = self._getFileInfo(file_item.id) if not file_info: logger.error(f"Could not get file info for fileId: {file_item.id}") return None # Build ChatDocument document = ChatDocument( id=str(uuid.uuid4()), messageId=messageId or "", fileId=file_item.id, fileName=file_info.get("fileName", fileName), fileSize=file_info.get("size", 0), mimeType=file_info.get("mimeType", mimeType) ) # Ensure document can access component interface later if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent: try: document.setComponentInterface(self.interfaceDbComponent) except Exception: pass return document except Exception as e: logger.error(f"Error creating document: {str(e)}") return None def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]: try: if not self.interfaceDbComponent: return None file_item = self.interfaceDbComponent.getFile(fileId) if file_item: return { "id": file_item.id, "fileName": file_item.fileName, "size": file_item.fileSize, "mimeType": file_item.mimeType, "fileHash": getattr(file_item, 'fileHash', None), "creationDate": getattr(file_item, 'creationDate', None) } return None except Exception as e: logger.error(f"Error getting file info for {fileId}: {str(e)}") return None def _getWorkflowContext(self, workflow) -> Dict[str, int]: try: return { 'currentRound': getattr(workflow, 'currentRound', 0), 'currentTask': getattr(workflow, 'currentTask', 0), 'currentAction': getattr(workflow, 'currentAction', 0) } except Exception: return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0} def _getWorkflowStats(self, workflow) -> Dict[str, Any]: try: context = self._getWorkflowContext(workflow) return { 'currentRound': context['currentRound'], 'currentTask': context['currentTask'], 'currentAction': context['currentAction'], 'totalTasks': getattr(workflow, 'totalTasks', 0), 'totalActions': getattr(workflow, 'totalActions', 0), 'workflowStatus': getattr(workflow, 'status', 'unknown'), 'workflowId': getattr(workflow, 'id', 'unknown') } except Exception: return { 'currentRound': 0, 'currentTask': 0, 'currentAction': 0, 'totalTasks': 0, 'totalActions': 0, 'workflowStatus': 'unknown', 'workflowId': 'unknown' } async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]: """ Render extracted JSON content to the specified output format. Args: extractedContent: Structured JSON document from AI extraction outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) title: Report title userPrompt: User's original prompt for report generation aiService: AI service instance for generation prompt creation Returns: tuple: (rendered_content, mime_type) """ try: # Validate JSON input if not isinstance(extractedContent, dict): raise ValueError("extractedContent must be a JSON dictionary") if "sections" not in extractedContent: raise ValueError("extractedContent must contain 'sections' field") # DEBUG: Log renderer input metadata only (no verbose JSON) TODO REMOVE try: import os ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S") debug_root = "./test-chat/ai" debug_dir = os.path.join(debug_root, f"render_input_{ts}") os.makedirs(debug_dir, exist_ok=True) with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f: f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n") f.write(f"content_size: {len(str(extractedContent))} characters\n") f.write(f"sections_count: {len(extractedContent.get('sections', []))}\n") except Exception: pass # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) if not renderer: raise ValueError(f"Unsupported output format: {outputFormat}") # Generate AI-based generation prompt if AI service is available generationPrompt = userPrompt # Default to user prompt if aiService and userPrompt: try: from .subPromptBuilder import buildGenerationPrompt generationPrompt = await buildGenerationPrompt( outputFormat=outputFormat, userPrompt=userPrompt, title=title, aiService=aiService, services=self.services ) except Exception as e: logger.warning(f"Failed to generate AI-based generation prompt: {str(e)}, using user prompt") generationPrompt = userPrompt # Render the JSON content with AI-generated prompt renderedContent, mimeType = await renderer.render(extractedContent, title, generationPrompt, aiService) # DEBUG: dump rendered output try: import os with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f: f.write(renderedContent or "") except Exception: pass logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters") return renderedContent, mimeType except Exception as e: logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}") raise async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str: """ Get the format-specific extraction prompt for AI content extraction. Args: outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx) userPrompt: User's original prompt for report generation title: Report title aiService: AI service instance for intent extraction Returns: str: Format-specific prompt for AI extraction """ try: # Get the appropriate renderer for the format renderer = self._getFormatRenderer(outputFormat) if not renderer: raise ValueError(f"Unsupported output format: {outputFormat}") # Build centralized prompt with generic rules + format-specific guidelines from .subPromptBuilder import buildExtractionPrompt extractionPrompt = await buildExtractionPrompt( outputFormat=outputFormat, renderer=renderer, userPrompt=userPrompt, title=title, aiService=aiService, services=self.services ) logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters") return extractionPrompt except Exception as e: logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}") raise def _getFormatRenderer(self, output_format: str): """Get the appropriate renderer for the specified format using auto-discovery.""" try: from .renderers.registry import get_renderer renderer = get_renderer(output_format, services=self.services) if renderer: return renderer # Fallback to text renderer if no specific renderer found logger.warning(f"No renderer found for format {output_format}, falling back to text") fallback_renderer = get_renderer('text', services=self.services) if fallback_renderer: return fallback_renderer logger.error("Even text renderer fallback failed") return None except Exception as e: logger.error(f"Error getting renderer for {output_format}: {str(e)}") return None