gateway/modules/methods/methodDocument.py
2025-06-10 18:19:33 +02:00

215 lines
No EOL
7.7 KiB
Python

"""
Document processing method module.
Handles document operations using the document service.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from modules.interfaces.serviceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem
)
from modules.workflow.managerDocument import DocumentManager
from modules.methods.methodBase import MethodBase
logger = logging.getLogger(__name__)
class MethodDocument(MethodBase):
"""Document processing method implementation"""
def __init__(self, serviceContainer):
"""Initialize the document method"""
super().__init__(serviceContainer)
self.documentManager = DocumentManager(serviceContainer)
async def process(self, action: str, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Process document operations
Args:
action: The action to perform
parameters: Action parameters
Returns:
Dictionary containing the operation result
Raises:
ValueError: If action is not supported
"""
try:
if action == "extract":
return await self._extractContent(parameters)
elif action == "analyze":
return await self._analyzeDocument(parameters)
elif action == "summarize":
return await self._summarizeDocument(parameters)
else:
raise ValueError(f"Unsupported action: {action}")
except Exception as e:
logger.error(f"Error processing document action {action}: {str(e)}")
raise
async def _extractContent(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Extract content from a document
Args:
parameters: Dictionary containing:
- documentId: ID of the document to process
- documentType: Type of document ('ChatDocument' or 'TaskDocument')
Returns:
Dictionary containing extracted content
"""
try:
documentId = parameters.get("documentId")
documentType = parameters.get("documentType", "ChatDocument")
if not documentId:
raise ValueError("documentId is required")
# Get document from database
if documentType == "ChatDocument":
document = await self._getChatDocument(documentId)
if not document:
raise ValueError(f"ChatDocument {documentId} not found")
extracted = await self.documentManager.extractFromChatDocument(document)
else:
document = await self._getTaskDocument(documentId)
if not document:
raise ValueError(f"TaskDocument {documentId} not found")
extracted = await self.documentManager.extractFromTaskDocument(document)
return {
"success": True,
"content": extracted.dict(),
"metadata": await self.documentManager.getDocumentMetadata(document)
}
except Exception as e:
logger.error(f"Error extracting content: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def _analyzeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Analyze document content
Args:
parameters: Dictionary containing:
- documentId: ID of the document to analyze
- documentType: Type of document
- analysisType: Type of analysis to perform
Returns:
Dictionary containing analysis results
"""
try:
# Extract content first
contentResult = await self._extractContent(parameters)
if not contentResult["success"]:
return contentResult
# Perform analysis based on type
analysisType = parameters.get("analysisType", "basic")
content = ExtractedContent(**contentResult["content"])
if analysisType == "basic":
# Basic analysis: count items, calculate statistics
stats = {
"totalItems": len(content.contents),
"totalSize": sum(item.metadata.size for item in content.contents),
"itemTypes": {}
}
for item in content.contents:
itemType = item.label
if itemType not in stats["itemTypes"]:
stats["itemTypes"][itemType] = 0
stats["itemTypes"][itemType] += 1
return {
"success": True,
"analysis": stats
}
else:
raise ValueError(f"Unsupported analysis type: {analysisType}")
except Exception as e:
logger.error(f"Error analyzing document: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def _summarizeDocument(self, parameters: Dict[str, Any]) -> Dict[str, Any]:
"""
Generate document summary
Args:
parameters: Dictionary containing:
- documentId: ID of the document to summarize
- documentType: Type of document
- summaryType: Type of summary to generate
Returns:
Dictionary containing summary
"""
try:
# Extract content first
contentResult = await self._extractContent(parameters)
if not contentResult["success"]:
return contentResult
# Generate summary based on type
summaryType = parameters.get("summaryType", "basic")
content = ExtractedContent(**contentResult["content"])
if summaryType == "basic":
# Basic summary: concatenate all text content
summary = "\n".join(
item.data for item in content.contents
if item.label == "main"
)
return {
"success": True,
"summary": summary
}
else:
raise ValueError(f"Unsupported summary type: {summaryType}")
except Exception as e:
logger.error(f"Error summarizing document: {str(e)}")
return {
"success": False,
"error": str(e)
}
async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]:
"""Get ChatDocument from database"""
try:
documentData = self.service.db.getRecord("chatDocuments", documentId)
if documentData:
return ChatDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error getting ChatDocument {documentId}: {str(e)}")
return None
async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]:
"""Get TaskDocument from database"""
try:
documentData = self.service.db.getRecord("taskDocuments", documentId)
if documentData:
return TaskDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error getting TaskDocument {documentId}: {str(e)}")
return None