""" Document Manager Module for handling document operations and content extraction. """ import base64 import logging import uuid from modules.interfaces.serviceChatModel import ( ChatDocument, TaskDocument, ExtractedContent, ContentItem, ContentMetadata ) from modules.workflow.serviceContainer import ServiceContainer from modules.workflow.processorDocument import DocumentProcessor logger = logging.getLogger(__name__) class DocumentManager: """Manager for document operations and content extraction""" def __init__(self, serviceContainer: ServiceContainer): self.service = serviceContainer self._processor = DocumentProcessor() async def extractFromChatDocument(self, prompt: str, document: ChatDocument) -> ExtractedContent: """ Extract content from a ChatDocument with AI processing. Args: prompt: Prompt for AI content extraction document: The ChatDocument to process Returns: ExtractedContent containing the processed content """ # Convert ChatDocument to TaskDocument taskDoc = await self._convertToTaskDocument(document) # Process document using processor extractedContent = await self._processor.processDocument(taskDoc, prompt) # Update the objectId and objectType to reference the original ChatDocument extractedContent.objectId = document.id extractedContent.objectType = "ChatDocument" return extractedContent async def extractFromTaskDocument(self, prompt: str, document: TaskDocument) -> ExtractedContent: """ Extract content directly from a task document. Args: prompt: The prompt to use for content extraction document: The task document to extract content from Returns: ExtractedContent containing the processed content Raises: ValueError: If document is invalid IOError: If file cannot be read """ try: return await self._processor.processDocument(document, prompt) except Exception as e: logger.error(f"Error extracting from task document: {str(e)}") raise async def _convertToTaskDocument(self, chatDoc: ChatDocument) -> TaskDocument: """ Convert a ChatDocument to a TaskDocument. Args: chatDoc: The chat document to convert Returns: TaskDocument containing the converted data Raises: ValueError: If document is invalid IOError: If file cannot be read """ try: # Get file content fileContent = await self.service.functions.getFileData(chatDoc.fileId) if not fileContent: raise ValueError(f"Could not get content for file {chatDoc.fileId}") # Convert to base64 base64Data = base64.b64encode(fileContent).decode('utf-8') return TaskDocument( id=str(uuid.uuid4()), filename=chatDoc.filename, fileSize=chatDoc.fileSize, mimeType=chatDoc.mimeType, data=base64Data ) except Exception as e: logger.error(f"Error converting chat document to task document: {str(e)}") raise