gateway/modules/workflow/managerDocument.py
2025-06-11 00:38:26 +02:00

104 lines
3.4 KiB
Python

"""
Document Manager Module for handling document operations and content extraction.
"""
import base64
import logging
import uuid
from modules.interfaces.serviceChatModel import (
ChatDocument,
TaskDocument,
ExtractedContent,
ContentItem,
ContentMetadata
)
from modules.workflow.serviceContainer import ServiceContainer
from modules.workflow.processorDocument import DocumentProcessor
logger = logging.getLogger(__name__)
class DocumentManager:
"""Manager for document operations and content extraction"""
def __init__(self, serviceContainer: ServiceContainer):
self.service = serviceContainer
self._processor = DocumentProcessor()
async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent:
"""
Extract content from a ChatDocument with AI processing.
Args:
prompt: Prompt for AI content extraction
document: The ChatDocument to process
Returns:
ExtractedContent containing the processed content
"""
# Convert ChatDocument to TaskDocument
taskDoc = await self._convertToTaskDocument(document)
# Process document using processor
extractedContent = await self._processor.processDocument(taskDoc, prompt)
# Update the objectId and objectType to reference the original ChatDocument
extractedContent.objectId = document.id
extractedContent.objectType = "ChatDocument"
return extractedContent
async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent:
"""
Extract content directly from a task document.
Args:
prompt: The prompt to use for content extraction
document: The task document to extract content from
Returns:
ExtractedContent containing the processed content
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
return await self._processor.processDocument(document, prompt)
except Exception as e:
logger.error(f"Error extracting from task document: {str(e)}")
raise
async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument:
"""
Convert a ChatDocument to a TaskDocument.
Args:
chatDoc: The chat document to convert
Returns:
TaskDocument containing the converted data
Raises:
ValueError: If document is invalid
IOError: If file cannot be read
"""
try:
# Get file content
fileContent = await self.service.functions.getFileData(chatDoc.fileId)
if not fileContent:
raise ValueError(f"Could not get content for file {chatDoc.fileId}")
# Convert to base64
base64Data = base64.b64encode(fileContent).decode('utf-8')
return TaskDocument(
id=str(uuid.uuid4()),
filename=chatDoc.filename,
fileSize=chatDoc.fileSize,
mimeType=chatDoc.mimeType,
data=base64Data
)
except Exception as e:
logger.error(f"Error converting chat document to task document: {str(e)}")
raise