gateway/modules/methods/methodDocument.py
2025-06-13 00:41:51 +02:00

202 lines
No EOL
7.4 KiB
Python

"""
Document processing method module.
Handles document operations using the document service.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from modules.interfaces.interfaceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem
)
from modules.workflow.managerDocument import DocumentManager
from modules.methods.methodBase import MethodBase, MethodResult, action
logger = logging.getLogger(__name__)
class MethodDocument(MethodBase):
"""Document processing method implementation"""
def __init__(self, serviceContainer):
"""Initialize the document method"""
super().__init__(serviceContainer)
self.documentManager = DocumentManager(serviceContainer)
@action
async def extract(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""
Extract content from document
Args:
parameters:
documentId: ID of the document to extract from
documentType: Type of document
extractionType: Type of extraction to perform
"""
try:
documentId = parameters["documentId"]
documentType = parameters.get("documentType", "text")
extractionType = parameters.get("extractionType", "full")
# Get document from service
document = await self.service.interfaceComponent.getDocument(documentId)
if not document:
return self._createResult(
success=False,
data={"error": f"Document not found: {documentId}"}
)
# Extract content based on type
if documentType == "text":
content = await self.documentManager.extractTextContent(document, extractionType)
elif documentType == "table":
content = await self.documentManager.extractTableContent(document, extractionType)
elif documentType == "image":
content = await self.documentManager.extractImageContent(document, extractionType)
else:
return self._createResult(
success=False,
data={"error": f"Unsupported document type: {documentType}"}
)
return self._createResult(
success=True,
data={
"documentId": documentId,
"type": documentType,
"content": content
}
)
except Exception as e:
logger.error(f"Error extracting content: {str(e)}")
return self._createResult(
success=False,
data={"error": str(e)}
)
@action
async def analyze(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""
Analyze document content
Args:
parameters:
documentId: ID of the document to analyze
documentType: Type of document
analysisType: Type of analysis to perform
"""
try:
# Extract content first
contentResult = await self.extract(parameters)
if not contentResult.success:
return contentResult
# Perform analysis based on type
analysisType = parameters.get("analysisType", "basic")
content = ExtractedContent(**contentResult.data["content"])
if analysisType == "basic":
# Basic analysis: count items, calculate statistics
stats = {
"totalItems": len(content.contents),
"totalSize": sum(item.metadata.size for item in content.contents),
"itemTypes": {}
}
for item in content.contents:
itemType = item.label
if itemType not in stats["itemTypes"]:
stats["itemTypes"][itemType] = 0
stats["itemTypes"][itemType] += 1
return self._createResult(
success=True,
data={
"documentId": parameters["documentId"],
"analysis": stats
}
)
else:
return self._createResult(
success=False,
data={"error": f"Unsupported analysis type: {analysisType}"}
)
except Exception as e:
logger.error(f"Error analyzing document: {str(e)}")
return self._createResult(
success=False,
data={"error": str(e)}
)
@action
async def summarize(self, parameters: Dict[str, Any], authData: Optional[Dict[str, Any]] = None) -> MethodResult:
"""
Summarize document content
Args:
parameters:
documentId: ID of the document to summarize
documentType: Type of document
summaryType: Type of summary to generate
"""
try:
# Extract content first
contentResult = await self.extract(parameters)
if not contentResult.success:
return contentResult
# Generate summary based on type
summaryType = parameters.get("summaryType", "basic")
content = ExtractedContent(**contentResult.data["content"])
if summaryType == "basic":
# Basic summary: concatenate all text content
summary = "\n".join(item.content for item in content.contents if item.content)
return self._createResult(
success=True,
data={
"documentId": parameters["documentId"],
"summary": summary
}
)
else:
return self._createResult(
success=False,
data={"error": f"Unsupported summary type: {summaryType}"}
)
except Exception as e:
logger.error(f"Error summarizing document: {str(e)}")
return self._createResult(
success=False,
data={"error": str(e)}
)
async def _getChatDocument(self, documentId: str) -> Optional[ChatDocument]:
"""Get ChatDocument from database"""
try:
documentData = self.service.db.getRecord("chatDocuments", documentId)
if documentData:
return ChatDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error getting ChatDocument {documentId}: {str(e)}")
return None
async def _getTaskDocument(self, documentId: str) -> Optional[TaskDocument]:
"""Get TaskDocument from database"""
try:
documentData = self.service.db.getRecord("taskDocuments", documentId)
if documentData:
return TaskDocument(**documentData)
return None
except Exception as e:
logger.error(f"Error getting TaskDocument {documentId}: {str(e)}")
return None