""" Document Manager Module for handling document operations and content extraction. """ import logging from typing import Dict, Any, List, Optional from datetime import datetime from modules.interfaces.serviceChatModel import ChatDocument, ChatContent from modules.workflow.documentProcessor import getDocumentContents logger = logging.getLogger(__name__) class DocumentManager: """Manager for document operations and content extraction.""" _instance = None @classmethod def getInstance(cls): """Return a singleton instance of the document manager.""" if cls._instance is None: cls._instance = cls() return cls._instance def __init__(self): """Initialize the document manager.""" if DocumentManager._instance is not None: raise RuntimeError("Singleton instance already exists - use getInstance()") self.service = None def initialize(self, service=None): """Initialize or update the manager with service references.""" if service: # Validate required interfaces required_interfaces = ['base', 'msft', 'google'] missing_interfaces = [] for interface in required_interfaces: if not hasattr(service, interface): missing_interfaces.append(interface) if missing_interfaces: logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}") return False self.service = service return True async def extractContent(self, fileId: int) -> Optional[ChatDocument]: """ Extract content from a file. Args: fileId: ID of the file to process Returns: ChatDocument object with extracted content or None if processing failed """ try: # Get file metadata and content from service fileMetadata = await self.service.base.getFileMetadata(fileId) fileContent = await self.service.base.getFileContent(fileId) if not fileMetadata or not fileContent: logger.error(f"Could not retrieve file data for fileId {fileId}") return None # Extract content using documentProcessor contents = getDocumentContents(fileMetadata, fileContent) # Create ChatDocument return ChatDocument( id=str(fileId), # Using fileId as document id fileId=fileId, filename=fileMetadata.get("name", "unknown"), fileSize=fileMetadata.get("size", 0), mimeType=fileMetadata.get("mimeType", "application/octet-stream"), contents=contents ) except Exception as e: logger.error(f"Error extracting content from file {fileId}: {str(e)}", exc_info=True) return None async def processFileIds(self, fileIds: List[int]) -> List[ChatDocument]: """ Process multiple files and extract their contents. Args: fileIds: List of file IDs to process Returns: List of ChatDocument objects """ documents = [] for fileId in fileIds: try: document = await self.extractContent(fileId) if document: documents.append(document) except Exception as e: logger.error(f"Error processing file {fileId}: {str(e)}") continue return documents async def getFileContent(self, fileId: int) -> Optional[bytes]: """ Get raw file content. Args: fileId: ID of the file Returns: File content as bytes or None if not found """ try: return await self.service.base.getFileContent(fileId) except Exception as e: logger.error(f"Error getting file content for {fileId}: {str(e)}") return None async def getFileMetadata(self, fileId: int) -> Optional[Dict[str, Any]]: """ Get file metadata. Args: fileId: ID of the file Returns: File metadata dictionary or None if not found """ try: return await self.service.base.getFileMetadata(fileId) except Exception as e: logger.error(f"Error getting file metadata for {fileId}: {str(e)}") return None async def saveFile(self, filename: str, content: bytes, mimeType: str) -> Optional[int]: """ Save a new file. Args: filename: Name of the file content: File content as bytes mimeType: MIME type of the file Returns: File ID if successful, None otherwise """ try: return await self.service.base.saveFile(filename, content, mimeType) except Exception as e: logger.error(f"Error saving file {filename}: {str(e)}") return None async def deleteFile(self, fileId: int) -> bool: """ Delete a file. Args: fileId: ID of the file to delete Returns: True if successful, False otherwise """ try: return await self.service.base.deleteFile(fileId) except Exception as e: logger.error(f"Error deleting file {fileId}: {str(e)}") return False # Singleton factory for the document manager def getDocumentManager(): return DocumentManager.getInstance()