""" Document Manager Module for handling document operations and content extraction. """ import logging from typing import Dict, Any, List, Optional from datetime import datetime from modules.interfaces.serviceChatModel import ChatDocument, ChatContent from modules.workflow.documentProcessor import getDocumentContents import uuid logger = logging.getLogger(__name__) class DocumentManager: """Manager for document operations and content extraction.""" _instance = None @classmethod def getInstance(cls): """Return a singleton instance of the document manager.""" if cls._instance is None: cls._instance = cls() return cls._instance def __init__(self): """Initialize the document manager.""" if DocumentManager._instance is not None: raise RuntimeError("Singleton instance already exists - use getInstance()") self.service = None def initialize(self, service=None): """Initialize or update the manager with service references.""" if service: # Validate required interfaces required_interfaces = ['base', 'msft', 'google'] missing_interfaces = [] for interface in required_interfaces: if not hasattr(service, interface): missing_interfaces.append(interface) if missing_interfaces: logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}") return False self.service = service return True async def extractContent(self, fileId: str) -> Optional[ChatDocument]: """Extracts content from a file and creates a chat document.""" try: # Get file content fileContent = await self.getFileContent(fileId) if not fileContent: return None # Get file metadata fileMetadata = await self.getFileMetadata(fileId) if not fileMetadata: return None # Create chat document return ChatDocument( id=str(uuid.uuid4()), fileId=fileId, filename=fileMetadata.get("name", "Unknown"), fileSize=fileMetadata.get("size", 0), content=fileContent.decode('utf-8', errors='ignore'), mimeType=fileMetadata.get("mimeType", "text/plain") ) except Exception as e: logger.error(f"Error extracting content from file {fileId}: {str(e)}") return None async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]: """ Process multiple files and extract their contents. Args: fileIds: List of file IDs to process Returns: List of ChatDocument objects """ documents = [] for fileId in fileIds: try: document = await self.extractContent(fileId) if document: documents.append(document) except Exception as e: logger.error(f"Error processing file {fileId}: {str(e)}") continue return documents async def getFileContent(self, fileId: str) -> Optional[bytes]: """Gets the content of a file.""" try: return self.service.functions.getFileData(fileId) except Exception as e: logger.error(f"Error getting file content for {fileId}: {str(e)}") return None async def getFileMetadata(self, fileId: str) -> Optional[Dict[str, Any]]: """Gets the metadata of a file.""" try: return self.service.functions.getFile(fileId) except Exception as e: logger.error(f"Error getting file metadata for {fileId}: {str(e)}") return None async def saveFile(self, filename: str, content: bytes, mimeType: str) -> Optional[int]: """ Save a new file. Args: filename: Name of the file content: File content as bytes mimeType: MIME type of the file Returns: File ID if successful, None otherwise """ try: return await self.service.base.saveFile(filename, content, mimeType) except Exception as e: logger.error(f"Error saving file {filename}: {str(e)}") return None async def deleteFile(self, fileId: str) -> bool: """Deletes a file.""" try: return self.service.functions.deleteFile(fileId) except Exception as e: logger.error(f"Error deleting file {fileId}: {str(e)}") return False # Singleton factory for the document manager def getDocumentManager(): return DocumentManager.getInstance()