gateway/modules/services/serviceGeneration/mainServiceGeneration.py

import logging
import uuid
from typing import Any, Dict, List, Optional
from datetime import datetime, UTC
import re
from modules.shared.timezoneUtils import get_utc_timestamp
from modules.datamodels.datamodelChat import ChatDocument
from modules.services.serviceGeneration.subDocumentUtility import (
    getFileExtension,
    getMimeTypeFromExtension,
    detectMimeTypeFromContent,
    detectMimeTypeFromData,
    convertDocumentDataToString
)

logger = logging.getLogger(__name__)

class GenerationService:
    def __init__(self, serviceCenter=None):
        # Directly use interfaces from the provided service center (no self.service calls)
        self.serviceCenter = serviceCenter
        self.interfaceDbComponent = getattr(serviceCenter, 'interfaceDbComponent', None) if serviceCenter else None
        self.interfaceDbChat = getattr(serviceCenter, 'interfaceDbChat', None) if serviceCenter else None
        self.workflow = getattr(serviceCenter, 'workflow', None) if serviceCenter else None

    def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
        """
        Process documents produced by AI actions and convert them to ChatDocument format.
        This function handles AI-generated document data, not document references.
        Returns a list of processed document dictionaries.
        """
        try:
            # Read documents from the standard documents field (not data.documents)
            documents = action_result.documents if action_result and hasattr(action_result, 'documents') else []

            if not documents:
                logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}")
                return []

            logger.info(f"Processing {len(documents)} documents from action_result.documents")

            # Process each document from the AI action result
            processed_documents = []
            for doc in documents:
                processed_doc = self.processSingleDocument(doc, action)
                if processed_doc:
                    processed_documents.append(processed_doc)

            logger.info(f"Successfully processed {len(processed_documents)} documents")
            return processed_documents
        except Exception as e:
            logger.error(f"Error processing action result documents: {str(e)}")
            return []

    def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
        """Process a single document from action result with simplified logic"""
        try:
            # ActionDocument objects have documentName, documentData, and mimeType
            mime_type = doc.mimeType
            if mime_type == "application/octet-stream":
                content = doc.documentData
                # Detect MIME without relying on a service center
                mime_type = detectMimeTypeFromContent(content, doc.documentName)

            return {
                'fileName': doc.documentName,
                'fileSize': len(str(doc.documentData)),
                'mimeType': mime_type,
                'content': doc.documentData,
                'document': doc
            }
        except Exception as e:
            logger.error(f"Error processing single document: {str(e)}")
            return None

    def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]:
        """
        Create actual document objects from action result and store them in the system.
        Returns a list of created document objects with proper workflow context.
        """
        try:
            logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
            logger.info(f"Action result documents count: {len(action_result.documents) if action_result.documents else 0}")

            processed_docs = self.processActionResultDocuments(action_result, action, workflow)
            logger.info(f"Processed {len(processed_docs)} documents")

            created_documents = []
            for i, doc_data in enumerate(processed_docs):
                try:
                    document_name = doc_data['fileName']
                    document_data = doc_data['content']
                    mime_type = doc_data['mimeType']

                    logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")

                    # Convert document data to string content
                    content = convertDocumentDataToString(document_data, getFileExtension(document_name))

                    # Skip empty or minimal content
                    minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
                    if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
                        logger.warning(f"Empty or minimal content for document {document_name}, skipping")
                        continue

                    logger.info(f"Document {document_name} has content: {len(content)} characters")

                    # Normalize file extension based on mime type if missing or incorrect
                    try:
                        mime_to_ext = {
                            "application/vnd.openxmlformats-officedocument.wordprocessingml.document": ".docx",
                            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": ".xlsx",
                            "application/vnd.openxmlformats-officedocument.presentationml.presentation": ".pptx",
                            "application/pdf": ".pdf",
                            "text/html": ".html",
                            "text/markdown": ".md",
                            "text/plain": ".txt",
                            "application/json": ".json",
                        }
                        expected_ext = mime_to_ext.get(mime_type)
                        if expected_ext:
                            if not document_name.lower().endswith(expected_ext):
                                # Append/replace extension to match mime type
                                if "." in document_name:
                                    document_name = document_name.rsplit(".", 1)[0] + expected_ext
                                else:
                                    document_name = document_name + expected_ext
                    except Exception:
                        pass

                    # Decide if content is base64-encoded binary (e.g., docx/pdf) or plain text
                    base64encoded = False
                    try:
                        binary_mime_types = {
                            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
                            "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
                            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
                            "application/pdf",
                        }
                        if isinstance(document_data, str) and mime_type in binary_mime_types:
                            base64encoded = True
                    except Exception:
                        base64encoded = False

                    # Create document with file in one step using interfaces directly
                    document = self._createDocument(
                        fileName=document_name,
                        mimeType=mime_type,
                        content=content,
                        base64encoded=base64encoded,
                        messageId=message_id
                    )
                    if document:
                        # Set workflow context on the document if possible
                        self._setDocumentWorkflowContext(document, action, workflow)
                        created_documents.append(document)
                        logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})")
                    else:
                        logger.error(f"Failed to create ChatDocument object for {document_name}")
                except Exception as e:
                    logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
                    continue

            logger.info(f"Successfully created {len(created_documents)} documents")
            return created_documents
        except Exception as e:
            logger.error(f"Error creating documents from action result: {str(e)}")
            return []

    def _setDocumentWorkflowContext(self, document, action, workflow):
        """Set workflow context on a document for proper routing and labeling"""
        try:
            # Get current workflow context directly from workflow object
            workflow_context = self._getWorkflowContext(workflow)
            workflow_stats = self._getWorkflowStats(workflow)

            current_round = workflow_context.get('currentRound', 0)
            current_task = workflow_context.get('currentTask', 0)
            current_action = workflow_context.get('currentAction', 0)

            # Try to set workflow context attributes if they exist
            if hasattr(document, 'roundNumber'):
                document.roundNumber = current_round
            if hasattr(document, 'taskNumber'):
                document.taskNumber = current_task
            if hasattr(document, 'actionNumber'):
                document.actionNumber = current_action
            if hasattr(document, 'actionId'):
                document.actionId = action.id if hasattr(action, 'id') else None

            # Set additional workflow metadata if available
            if hasattr(document, 'workflowId'):
                document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
            if hasattr(document, 'workflowStatus'):
                document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')

            logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}")
            logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}")

        except Exception as e:
            logger.warning(f"Could not set workflow context on document: {str(e)}")

    def _createDocument(self, fileName: str, mimeType: str, content: str, base64encoded: bool = True, messageId: str = None) -> Optional[ChatDocument]:
        """Create file and ChatDocument using interfaces without service indirection."""
        try:
            if not self.interfaceDbComponent:
                logger.error("Component interface not available for document creation")
                return None
            # Convert content to bytes
            if base64encoded:
                import base64
                content_bytes = base64.b64decode(content)
            else:
                content_bytes = content.encode('utf-8')
            # Create file and store data
            file_item = self.interfaceDbComponent.createFile(
                name=fileName,
                mimeType=mimeType,
                content=content_bytes
            )
            self.interfaceDbComponent.createFileData(file_item.id, content_bytes)
            # Collect file info
            file_info = self._getFileInfo(file_item.id)
            if not file_info:
                logger.error(f"Could not get file info for fileId: {file_item.id}")
                return None
            # Build ChatDocument
            document = ChatDocument(
                id=str(uuid.uuid4()),
                messageId=messageId or "",
                fileId=file_item.id,
                fileName=file_info.get("fileName", fileName),
                fileSize=file_info.get("size", 0),
                mimeType=file_info.get("mimeType", mimeType)
            )
            # Ensure document can access component interface later
            if hasattr(document, 'setComponentInterface') and self.interfaceDbComponent:
                try:
                    document.setComponentInterface(self.interfaceDbComponent)
                except Exception:
                    pass
            return document
        except Exception as e:
            logger.error(f"Error creating document: {str(e)}")
            return None

    def _getFileInfo(self, fileId: str) -> Optional[Dict[str, Any]]:
        try:
            if not self.interfaceDbComponent:
                return None
            file_item = self.interfaceDbComponent.getFile(fileId)
            if file_item:
                return {
                    "id": file_item.id,
                    "fileName": file_item.fileName,
                    "size": file_item.fileSize,
                    "mimeType": file_item.mimeType,
                    "fileHash": getattr(file_item, 'fileHash', None),
                    "creationDate": getattr(file_item, 'creationDate', None)
                }
            return None
        except Exception as e:
            logger.error(f"Error getting file info for {fileId}: {str(e)}")
            return None

    def _getWorkflowContext(self, workflow) -> Dict[str, int]:
        try:
            return {
                'currentRound': getattr(workflow, 'currentRound', 0),
                'currentTask': getattr(workflow, 'currentTask', 0),
                'currentAction': getattr(workflow, 'currentAction', 0)
            }
        except Exception:
            return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}

    def _getWorkflowStats(self, workflow) -> Dict[str, Any]:
        try:
            context = self._getWorkflowContext(workflow)
            return {
                'currentRound': context['currentRound'],
                'currentTask': context['currentTask'],
                'currentAction': context['currentAction'],
                'totalTasks': getattr(workflow, 'totalTasks', 0),
                'totalActions': getattr(workflow, 'totalActions', 0),
                'workflowStatus': getattr(workflow, 'status', 'unknown'),
                'workflowId': getattr(workflow, 'id', 'unknown')
            }
        except Exception:
            return {
                'currentRound': 0,
                'currentTask': 0,
                'currentAction': 0,
                'totalTasks': 0,
                'totalActions': 0,
                'workflowStatus': 'unknown',
                'workflowId': 'unknown'
            }

    async def renderReport(self, extractedContent: Dict[str, Any], outputFormat: str, title: str, userPrompt: str = None, aiService=None) -> tuple[str, str]:
        """
        Render extracted JSON content to the specified output format.

        Args:
            extractedContent: Structured JSON document from AI extraction
            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
            title: Report title
            userPrompt: User's original prompt for report generation
            aiService: AI service instance for generation prompt creation

        Returns:
            tuple: (rendered_content, mime_type)
        """
        try:
            # Validate JSON input
            if not isinstance(extractedContent, dict):
                raise ValueError("extractedContent must be a JSON dictionary")

            if "sections" not in extractedContent:
                raise ValueError("extractedContent must contain 'sections' field")

            # DEBUG: dump renderer input to diagnose JSON structure TODO REMOVE
            try:
                import os
                import json
                ts = datetime.now(UTC).strftime("%Y%m%d-%H%M%S")
                debug_root = "./test-chat/ai"
                debug_dir = os.path.join(debug_root, f"render_input_{ts}")
                os.makedirs(debug_dir, exist_ok=True)
                with open(os.path.join(debug_dir, "meta.txt"), "w", encoding="utf-8") as f:
                    f.write(f"title: {title}\nformat: {outputFormat}\ncontent_type: {type(extractedContent).__name__}\n")
                with open(os.path.join(debug_dir, "extracted_content.json"), "w", encoding="utf-8") as f:
                    json.dump(extractedContent, f, indent=2, ensure_ascii=False)
            except Exception:
                pass

            # Get the appropriate renderer for the format
            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
                raise ValueError(f"Unsupported output format: {outputFormat}")

            # Generate AI-based generation prompt if AI service is available
            generationPrompt = userPrompt  # Default to user prompt
            if aiService and userPrompt:
                try:
                    from .subPromptBuilder import buildGenerationPrompt
                    generationPrompt = await buildGenerationPrompt(
                        outputFormat=outputFormat,
                        userPrompt=userPrompt,
                        title=title,
                        aiService=aiService
                    )
                except Exception as e:
                    logger.warning(f"Failed to generate AI-based generation prompt: {str(e)}, using user prompt")
                    generationPrompt = userPrompt

            # Render the JSON content with AI-generated prompt
            renderedContent, mimeType = await renderer.render(extractedContent, title, generationPrompt, aiService)
            # DEBUG: dump rendered output
            try:
                import os
                with open(os.path.join(debug_dir, "rendered_output.txt"), "w", encoding="utf-8") as f:
                    f.write(renderedContent or "")
            except Exception:
                pass

            logger.info(f"Successfully rendered JSON report to {outputFormat} format: {len(renderedContent)} characters")
            return renderedContent, mimeType

        except Exception as e:
            logger.error(f"Error rendering JSON report to {outputFormat}: {str(e)}")
            raise

    async def getExtractionPrompt(self, outputFormat: str, userPrompt: str, title: str, aiService=None) -> str:
        """
        Get the format-specific extraction prompt for AI content extraction.

        Args:
            outputFormat: Target format (html, pdf, docx, txt, md, json, csv, xlsx)
            userPrompt: User's original prompt for report generation
            title: Report title
            aiService: AI service instance for intent extraction

        Returns:
            str: Format-specific prompt for AI extraction
        """
        try:
            # Get the appropriate renderer for the format
            renderer = self._getFormatRenderer(outputFormat)
            if not renderer:
                raise ValueError(f"Unsupported output format: {outputFormat}")

            # Build centralized prompt with generic rules + format-specific guidelines
            from .subPromptBuilder import buildExtractionPrompt
            extractionPrompt = await buildExtractionPrompt(
                outputFormat=outputFormat,
                renderer=renderer,
                userPrompt=userPrompt,
                title=title,
                aiService=aiService
            )

            logger.info(f"Generated {outputFormat}-specific extraction prompt: {len(extractionPrompt)} characters")
            return extractionPrompt

        except Exception as e:
            logger.error(f"Error getting extraction prompt for {outputFormat}: {str(e)}")
            raise

    def _getFormatRenderer(self, output_format: str):
        """Get the appropriate renderer for the specified format using auto-discovery."""
        try:
            from .renderers.registry import get_renderer
            renderer = get_renderer(output_format)

            if renderer:
                return renderer

            # Fallback to text renderer if no specific renderer found
            logger.warning(f"No renderer found for format {output_format}, falling back to text")
            fallback_renderer = get_renderer('text')
            if fallback_renderer:
                return fallback_renderer

            logger.error("Even text renderer fallback failed")
            return None

        except Exception as e:
            logger.error(f"Error getting renderer for {output_format}: {str(e)}")
            return None