gateway/modules/chat/documents/documentGeneration.py
2025-07-29 11:39:02 +02:00

134 lines
6.4 KiB
Python

import logging
from typing import Any, Dict, List, Optional
from datetime import datetime, UTC
from .documentUtility import (
getFileExtension,
getMimeTypeFromExtension,
detectMimeTypeFromContent,
detectMimeTypeFromData,
convertDocumentDataToString
)
logger = logging.getLogger(__name__)
class DocumentGenerator:
def __init__(self, service):
self.service = service
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
"""
Main function to process documents from an action result.
Returns a list of processed document dictionaries.
"""
try:
documents = action_result.data.get("documents", [])
logger.debug(f"Processing {len(documents)} documents from action result")
processed_documents = []
for doc in documents:
processed_doc = self.processSingleDocument(doc, action)
if processed_doc:
processed_documents.append(processed_doc)
logger.debug(f"Successfully processed {len(processed_documents)} documents")
return processed_documents
except Exception as e:
logger.error(f"Error processing action result documents: {str(e)}")
return []
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
"""Process a single document from action result"""
try:
if hasattr(doc, 'filename') and doc.filename:
# Document object with filename attribute
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
if mime_type == "application/octet-stream":
content = getattr(doc, 'content', '')
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
return {
'filename': doc.filename,
'fileSize': getattr(doc, 'fileSize', 0),
'mimeType': mime_type,
'content': getattr(doc, 'content', ''),
'document': doc
}
elif isinstance(doc, dict):
# Dictionary format document
filename = doc.get('documentName', doc.get('filename', \
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
mimeType = doc.get('mimeType', 'application/octet-stream')
if mimeType == "application/octet-stream":
document_data = doc.get('documentData', '')
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
return {
'filename': filename,
'fileSize': fileSize,
'mimeType': mimeType,
'content': doc.get('documentData', ''),
'document': doc
}
else:
# Unknown document type
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
return {
'filename': filename,
'fileSize': 0,
'mimeType': mimeType,
'content': str(doc),
'document': doc
}
except Exception as e:
logger.error(f"Error processing single document: {str(e)}")
return None
def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
"""
Create actual document objects from action result and store them in the system.
Returns a list of created document objects.
"""
try:
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
created_documents = []
for doc_data in processed_docs:
try:
document_name = doc_data['filename']
document_data = doc_data['content']
mime_type = doc_data['mimeType']
# Convert document data to string content
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
# Skip empty or minimal content
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
continue
# Create file in system
file_id = self.service.createFile(
fileName=document_name,
mimeType=mime_type,
content=content,
base64encoded=False
)
if not file_id:
logger.error(f"Failed to create file for document {document_name}")
continue
# Create document object
document = self.service.createDocument(
fileName=document_name,
mimeType=mime_type,
content=content,
base64encoded=False
)
if document:
created_documents.append(document)
logger.debug(f"Created document: {document_name} ({len(content)} bytes, {mime_type})")
else:
logger.error(f"Failed to create ChatDocument object for {document_name}")
except Exception as e:
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
continue
logger.info(f"Created {len(created_documents)} documents from action result")
return created_documents
except Exception as e:
logger.error(f"Error creating documents from action result: {str(e)}")
return []