gateway/modules/methods/methodDocument.py

"""
Document processing method module.
Handles document operations using the document service.
"""

import logging
from typing import Dict, Any, List, Optional
from datetime import datetime

from modules.interfaces.serviceChatModel import (
    ChatDocument,
    TaskDocument,
    ExtractedContent,
    ContentItem
)
from modules.workflow.managerDocument import DocumentManager
from modules.methods.methodBase import MethodBase

logger = logging.getLogger(__name__)

class MethodDocument(MethodBase):
    """Document processing method implementation"""

    def __init__(self, serviceContainer):
        """Initialize the document method"""
        super().__init__(serviceContainer)
        self.documentManager = DocumentManager(serviceContainer)

    async def process(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """
        Process document operations

        Args:
            action: The action to perform
            parameters: Action parameters

        Returns:
            Dictionary containing the operation result

        Raises:
            ValueError: If action is not supported
        """
        try:
            if action == "extract":
                return await self._extractContent(parameters)
            elif action == "analyze":
                return await self._analyzeDocument(parameters)
            elif action == "summarize":
                return await self._summarizeDocument(parameters)
            else:
                raise ValueError(f"Unsupported action: {action}")
        except Exception as e:
            logger.error(f"Error processing document action {action}: {str(e)}")
            raise

    async def _extractContent(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """
        Extract content from a document

        Args:
            parameters: Dictionary containing:
                - documentId: ID of the document to process
                - documentType: Type of document ('ChatDocument' or 'TaskDocument')

        Returns:
            Dictionary containing extracted content
        """
        try:
            documentId = parameters.get("documentId")
            documentType = parameters.get("documentType", "ChatDocument")

            if not documentId:
                raise ValueError("documentId is required")

            # Get document from database
            if documentType == "ChatDocument":
                document = await self._getChatDocument(documentId)
                if not document:
                    raise ValueError(f"ChatDocument {documentId} not found")
                extracted = await self.documentManager.extractFromChatDocument(document)
            else:
                document = await self._getTaskDocument(documentId)
                if not document:
                    raise ValueError(f"TaskDocument {documentId} not found")
                extracted = await self.documentManager.extractFromTaskDocument(document)

            return {
                "success": True,
                "content": extracted.dict(),
                "metadata": await self.documentManager.getDocumentMetadata(document)
            }

        except Exception as e:
            logger.error(f"Error extracting content: {str(e)}")
            return {
                "success": False,
                "error": str(e)
            }

    async def _analyzeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """
        Analyze document content

        Args:
            parameters: Dictionary containing:
                - documentId: ID of the document to analyze
                - documentType: Type of document
                - analysisType: Type of analysis to perform

        Returns:
            Dictionary containing analysis results
        """
        try:
            # Extract content first
            contentResult = await self._extractContent(parameters)
            if not contentResult["success"]:
                return contentResult

            # Perform analysis based on type
            analysisType = parameters.get("analysisType", "basic")
            content = ExtractedContent(**contentResult["content"])

            if analysisType == "basic":
                # Basic analysis: count items, calculate statistics
                stats = {
                    "totalItems": len(content.contents),
                    "totalSize": sum(item.metadata.size for item in content.contents),
                    "itemTypes": {}
                }

                for item in content.contents:
                    itemType = item.label
                    if itemType not in stats["itemTypes"]:
                        stats["itemTypes"][itemType] = 0
                    stats["itemTypes"][itemType] += 1

                return {
                    "success": True,
                    "analysis": stats
                }
            else:
                raise ValueError(f"Unsupported analysis type: {analysisType}")

        except Exception as e:
            logger.error(f"Error analyzing document: {str(e)}")
            return {
                "success": False,
                "error": str(e)
            }

    async def _summarizeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
        """
        Generate document summary

        Args:
            parameters: Dictionary containing:
                - documentId: ID of the document to summarize
                - documentType: Type of document
                - summaryType: Type of summary to generate

        Returns:
            Dictionary containing summary
        """
        try:
            # Extract content first
            contentResult = await self._extractContent(parameters)
            if not contentResult["success"]:
                return contentResult

            # Generate summary based on type
            summaryType = parameters.get("summaryType", "basic")
            content = ExtractedContent(**contentResult["content"])

            if summaryType == "basic":
                # Basic summary: concatenate all text content
                summary = "\n".join(
                    item.data for item in content.contents
                    if item.label == "main"
                )

                return {
                    "success": True,
                    "summary": summary
                }
            else:
                raise ValueError(f"Unsupported summary type: {summaryType}")

        except Exception as e:
            logger.error(f"Error summarizing document: {str(e)}")
            return {
                "success": False,
                "error": str(e)
            }

    async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]:
        """Get ChatDocument from database"""
        try:
            documentData = self.service.db.getRecord("chatDocuments", documentId)
            if documentData:
                return ChatDocument(**documentData)
            return None
        except Exception as e:
            logger.error(f"Error getting ChatDocument {documentId}: {str(e)}")
            return None

    async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]:
        """Get TaskDocument from database"""
        try:
            documentData = self.service.db.getRecord("taskDocuments", documentId)
            if documentData:
                return TaskDocument(**documentData)
            return None
        except Exception as e:
            logger.error(f"Error getting TaskDocument {documentId}: {str(e)}")
            return None