""" Document processing method module. Handles document operations using the document service. """ import logging from typing import Dict, Any, List, Optional from datetime import datetime from modules.interfaces.interfaceChatModel import ( ChatDocument, TaskDocument, ExtractedContent, ContentItem ) from modules.workflow.managerDocument import DocumentManager from modules.methods.methodBase import MethodBase, MethodResult, action logger = logging.getLogger(__name__) class MethodDocument(MethodBase): """Document processing method implementation""" def __init__(self, serviceContainer): """Initialize the document method""" super().__init__(serviceContainer) self.documentManager = DocumentManager(serviceContainer) @action async def extract(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: """ Extract content from document Args: parameters: documentId: ID of the document to extract from documentType: Type of document extractionType: Type of extraction to perform """ try: documentId = parameters["documentId"] documentType = parameters.get("documentType", "text") extractionType = parameters.get("extractionType", "full") # Get document from service document = await self.service.interfaceComponent.getDocument(documentId) if not document: return self._createResult( success=False, data={"error": f"Document not found: {documentId}"} ) # Extract content based on type if documentType == "text": content = await self.documentManager.extractTextContent(document, extractionType) elif documentType == "table": content = await self.documentManager.extractTableContent(document, extractionType) elif documentType == "image": content = await self.documentManager.extractImageContent(document, extractionType) else: return self._createResult( success=False, data={"error": f"Unsupported document type: {documentType}"} ) return self._createResult( success=True, data={ "documentId": documentId, "type": documentType, "content": content } ) except Exception as e: logger.error(f"Error extracting content: {str(e)}") return self._createResult( success=False, data={"error": str(e)} ) @action async def analyze(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: """ Analyze document content Args: parameters: documentId: ID of the document to analyze documentType: Type of document analysisType: Type of analysis to perform """ try: # Extract content first contentResult = await self.extract(parameters) if not contentResult.success: return contentResult # Perform analysis based on type analysisType = parameters.get("analysisType", "basic") content = ExtractedContent(**contentResult.data["content"]) if analysisType == "basic": # Basic analysis: count items, calculate statistics stats = { "totalItems": len(content.contents), "totalSize": sum(item.metadata.size for item in content.contents), "itemTypes": {} } for item in content.contents: itemType = item.label if itemType not in stats["itemTypes"]: stats["itemTypes"][itemType] = 0 stats["itemTypes"][itemType] += 1 return self._createResult( success=True, data={ "documentId": parameters["documentId"], "analysis": stats } ) else: return self._createResult( success=False, data={"error": f"Unsupported analysis type: {analysisType}"} ) except Exception as e: logger.error(f"Error analyzing document: {str(e)}") return self._createResult( success=False, data={"error": str(e)} ) @action async def summarize(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult: """ Summarize document content Args: parameters: documentId: ID of the document to summarize documentType: Type of document summaryType: Type of summary to generate """ try: # Extract content first contentResult = await self.extract(parameters) if not contentResult.success: return contentResult # Generate summary based on type summaryType = parameters.get("summaryType", "basic") content = ExtractedContent(**contentResult.data["content"]) if summaryType == "basic": # Basic summary: concatenate all text content summary = "\n".join(item.content for item in content.contents if item.content) return self._createResult( success=True, data={ "documentId": parameters["documentId"], "summary": summary } ) else: return self._createResult( success=False, data={"error": f"Unsupported summary type: {summaryType}"} ) except Exception as e: logger.error(f"Error summarizing document: {str(e)}") return self._createResult( success=False, data={"error": str(e)} ) async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]: """Get ChatDocument from database""" try: documentData = self.service.db.getRecord("chatDocuments", documentId) if documentData: return ChatDocument(**documentData) return None except Exception as e: logger.error(f"Error getting ChatDocument {documentId}: {str(e)}") return None async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]: """Get TaskDocument from database""" try: documentData = self.service.db.getRecord("taskDocuments", documentId) if documentData: return TaskDocument(**documentData) return None except Exception as e: logger.error(f"Error getting TaskDocument {documentId}: {str(e)}") return None