""" Document processing method module. Handles document operations using the document service. """ import logging from typing import Dict, Any, List, Optional from datetime import datetime from modules.interfaces.serviceChatModel import ( ChatDocument, TaskDocument, ExtractedContent, ContentItem ) from modules.workflow.managerDocument import DocumentManager from modules.methods.methodBase import MethodBase logger = logging.getLogger(__name__) class MethodDocument(MethodBase): """Document processing method implementation""" def __init__(self, serviceContainer): """Initialize the document method""" super().__init__(serviceContainer) self.documentManager = DocumentManager(serviceContainer) async def process(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]: """ Process document operations Args: action: The action to perform parameters: Action parameters Returns: Dictionary containing the operation result Raises: ValueError: If action is not supported """ try: if action == "extract": return await self._extractContent(parameters) elif action == "analyze": return await self._analyzeDocument(parameters) elif action == "summarize": return await self._summarizeDocument(parameters) else: raise ValueError(f"Unsupported action: {action}") except Exception as e: logger.error(f"Error processing document action {action}: {str(e)}") raise async def _extractContent(self, parameters: Dict[str, Any]) -> Dict[str, Any]: """ Extract content from a document Args: parameters: Dictionary containing: - documentId: ID of the document to process - documentType: Type of document ('ChatDocument' or 'TaskDocument') Returns: Dictionary containing extracted content """ try: documentId = parameters.get("documentId") documentType = parameters.get("documentType", "ChatDocument") if not documentId: raise ValueError("documentId is required") # Get document from database if documentType == "ChatDocument": document = await self._getChatDocument(documentId) if not document: raise ValueError(f"ChatDocument {documentId} not found") extracted = await self.documentManager.extractFromChatDocument(document) else: document = await self._getTaskDocument(documentId) if not document: raise ValueError(f"TaskDocument {documentId} not found") extracted = await self.documentManager.extractFromTaskDocument(document) return { "success": True, "content": extracted.dict(), "metadata": await self.documentManager.getDocumentMetadata(document) } except Exception as e: logger.error(f"Error extracting content: {str(e)}") return { "success": False, "error": str(e) } async def _analyzeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]: """ Analyze document content Args: parameters: Dictionary containing: - documentId: ID of the document to analyze - documentType: Type of document - analysisType: Type of analysis to perform Returns: Dictionary containing analysis results """ try: # Extract content first contentResult = await self._extractContent(parameters) if not contentResult["success"]: return contentResult # Perform analysis based on type analysisType = parameters.get("analysisType", "basic") content = ExtractedContent(**contentResult["content"]) if analysisType == "basic": # Basic analysis: count items, calculate statistics stats = { "totalItems": len(content.contents), "totalSize": sum(item.metadata.size for item in content.contents), "itemTypes": {} } for item in content.contents: itemType = item.label if itemType not in stats["itemTypes"]: stats["itemTypes"][itemType] = 0 stats["itemTypes"][itemType] += 1 return { "success": True, "analysis": stats } else: raise ValueError(f"Unsupported analysis type: {analysisType}") except Exception as e: logger.error(f"Error analyzing document: {str(e)}") return { "success": False, "error": str(e) } async def _summarizeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]: """ Generate document summary Args: parameters: Dictionary containing: - documentId: ID of the document to summarize - documentType: Type of document - summaryType: Type of summary to generate Returns: Dictionary containing summary """ try: # Extract content first contentResult = await self._extractContent(parameters) if not contentResult["success"]: return contentResult # Generate summary based on type summaryType = parameters.get("summaryType", "basic") content = ExtractedContent(**contentResult["content"]) if summaryType == "basic": # Basic summary: concatenate all text content summary = "\n".join( item.data for item in content.contents if item.label == "main" ) return { "success": True, "summary": summary } else: raise ValueError(f"Unsupported summary type: {summaryType}") except Exception as e: logger.error(f"Error summarizing document: {str(e)}") return { "success": False, "error": str(e) } async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]: """Get ChatDocument from database""" try: documentData = self.service.db.getRecord("chatDocuments", documentId) if documentData: return ChatDocument(**documentData) return None except Exception as e: logger.error(f"Error getting ChatDocument {documentId}: {str(e)}") return None async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]: """Get TaskDocument from database""" try: documentData = self.service.db.getRecord("taskDocuments", documentId) if documentData: return TaskDocument(**documentData) return None except Exception as e: logger.error(f"Error getting TaskDocument {documentId}: {str(e)}") return None