gateway/modules/workflow/documentManager.py
2025-05-30 01:12:59 +02:00

141 lines
No EOL
4.9 KiB
Python

"""
Document Manager Module for handling document operations and content extraction.
"""
import logging
from typing import Dict, Any, List, Optional
from datetime import datetime
from modules.interfaces.serviceChatModel import ChatDocument, ChatContent
from modules.workflow.documentProcessor import getDocumentContents
import uuid
logger = logging.getLogger(__name__)
class DocumentManager:
"""Manager for document operations and content extraction."""
_instance = None
@classmethod
def getInstance(cls):
"""Return a singleton instance of the document manager."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialize the document manager."""
if DocumentManager._instance is not None:
raise RuntimeError("Singleton instance already exists - use getInstance()")
self.service = None
def initialize(self, service=None):
"""Initialize or update the manager with service references."""
if service:
# Validate required interfaces
required_interfaces = ['base', 'msft', 'google']
missing_interfaces = []
for interface in required_interfaces:
if not hasattr(service, interface):
missing_interfaces.append(interface)
if missing_interfaces:
logger.warning(f"Service container missing required interfaces: {', '.join(missing_interfaces)}")
return False
self.service = service
return True
async def extractContent(self, fileId: str) -> Optional[ChatDocument]:
"""Extracts content from a file and creates a chat document."""
try:
# Get file content
fileContent = await self.getFileContent(fileId)
if not fileContent:
return None
# Get file metadata
fileMetadata = await self.getFileMetadata(fileId)
if not fileMetadata:
return None
# Create chat document
return ChatDocument(
id=str(uuid.uuid4()),
fileId=fileId,
filename=fileMetadata.get("name", "Unknown"),
fileSize=fileMetadata.get("size", 0),
content=fileContent.decode('utf-8', errors='ignore'),
mimeType=fileMetadata.get("mimeType", "text/plain")
)
except Exception as e:
logger.error(f"Error extracting content from file {fileId}: {str(e)}")
return None
async def processFileIds(self, fileIds: List[str]) -> List[ChatDocument]:
"""
Process multiple files and extract their contents.
Args:
fileIds: List of file IDs to process
Returns:
List of ChatDocument objects
"""
documents = []
for fileId in fileIds:
try:
document = await self.extractContent(fileId)
if document:
documents.append(document)
except Exception as e:
logger.error(f"Error processing file {fileId}: {str(e)}")
continue
return documents
async def getFileContent(self, fileId: str) -> Optional[bytes]:
"""Gets the content of a file."""
try:
return self.service.functions.getFileData(fileId)
except Exception as e:
logger.error(f"Error getting file content for {fileId}: {str(e)}")
return None
async def getFileMetadata(self, fileId: str) -> Optional[Dict[str, Any]]:
"""Gets the metadata of a file."""
try:
return self.service.functions.getFile(fileId)
except Exception as e:
logger.error(f"Error getting file metadata for {fileId}: {str(e)}")
return None
async def saveFile(self, filename: str, content: bytes, mimeType: str) -> Optional[int]:
"""
Save a new file.
Args:
filename: Name of the file
content: File content as bytes
mimeType: MIME type of the file
Returns:
File ID if successful, None otherwise
"""
try:
return await self.service.base.saveFile(filename, content, mimeType)
except Exception as e:
logger.error(f"Error saving file {filename}: {str(e)}")
return None
async def deleteFile(self, fileId: str) -> bool:
"""Deletes a file."""
try:
return self.service.functions.deleteFile(fileId)
except Exception as e:
logger.error(f"Error deleting file {fileId}: {str(e)}")
return False
# Singleton factory for the document manager
def getDocumentManager():
return DocumentManager.getInstance()