163 lines
No EOL
7.5 KiB
Python
163 lines
No EOL
7.5 KiB
Python
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
from datetime import datetime, UTC
|
|
from .documentUtility import (
|
|
getFileExtension,
|
|
getMimeTypeFromExtension,
|
|
detectMimeTypeFromContent,
|
|
detectMimeTypeFromData,
|
|
convertDocumentDataToString
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class DocumentGenerator:
|
|
def __init__(self, service):
|
|
self.service = service
|
|
|
|
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
|
"""
|
|
Main function to process documents from an action result.
|
|
Returns a list of processed document dictionaries.
|
|
"""
|
|
try:
|
|
documents = action_result.data.get("documents", [])
|
|
processed_documents = []
|
|
for doc in documents:
|
|
processed_doc = self.processSingleDocument(doc, action)
|
|
if processed_doc:
|
|
processed_documents.append(processed_doc)
|
|
return processed_documents
|
|
except Exception as e:
|
|
logger.error(f"Error processing action result documents: {str(e)}")
|
|
return []
|
|
|
|
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
|
|
"""Process a single document from action result"""
|
|
try:
|
|
if hasattr(doc, 'filename') and doc.filename:
|
|
# Document object with filename attribute
|
|
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
|
if mime_type == "application/octet-stream":
|
|
content = getattr(doc, 'content', '')
|
|
mime_type = detectMimeTypeFromContent(content, doc.filename, self.service)
|
|
return {
|
|
'filename': doc.filename,
|
|
'fileSize': getattr(doc, 'fileSize', 0),
|
|
'mimeType': mime_type,
|
|
'content': getattr(doc, 'content', ''),
|
|
'document': doc
|
|
}
|
|
elif isinstance(doc, dict):
|
|
# Dictionary format document
|
|
filename = doc.get('documentName', doc.get('filename', \
|
|
f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"))
|
|
fileSize = doc.get('fileSize', len(str(doc.get('documentData', ''))))
|
|
mimeType = doc.get('mimeType', 'application/octet-stream')
|
|
if mimeType == "application/octet-stream":
|
|
document_data = doc.get('documentData', '')
|
|
mimeType = detectMimeTypeFromContent(document_data, filename, self.service)
|
|
return {
|
|
'filename': filename,
|
|
'fileSize': fileSize,
|
|
'mimeType': mimeType,
|
|
'content': doc.get('documentData', ''),
|
|
'document': doc
|
|
}
|
|
else:
|
|
# Unknown document type
|
|
logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}")
|
|
filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}"
|
|
mimeType = detectMimeTypeFromContent(doc, filename, self.service)
|
|
return {
|
|
'filename': filename,
|
|
'fileSize': 0,
|
|
'mimeType': mimeType,
|
|
'content': str(doc),
|
|
'document': doc
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error processing single document: {str(e)}")
|
|
return None
|
|
|
|
def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]:
|
|
"""
|
|
Create actual document objects from action result and store them in the system.
|
|
Returns a list of created document objects.
|
|
"""
|
|
try:
|
|
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
|
created_documents = []
|
|
for doc_data in processed_docs:
|
|
try:
|
|
document_name = doc_data['filename']
|
|
document_data = doc_data['content']
|
|
mime_type = doc_data['mimeType']
|
|
# Convert document data to string content
|
|
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
|
# Skip empty or minimal content
|
|
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
|
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
|
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
|
continue
|
|
# Create file in system
|
|
file_id = self.service.createFile(
|
|
fileName=document_name,
|
|
mimeType=mime_type,
|
|
content=content,
|
|
base64encoded=False
|
|
)
|
|
if not file_id:
|
|
logger.error(f"Failed to create file for document {document_name}")
|
|
continue
|
|
# Create document object
|
|
document = self.service.createDocument(
|
|
fileName=document_name,
|
|
mimeType=mime_type,
|
|
content=content,
|
|
base64encoded=False
|
|
)
|
|
if document:
|
|
created_documents.append(document)
|
|
logger.info(f"Created document: {document_name} with file ID: {file_id} and MIME type: {mime_type}")
|
|
else:
|
|
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
|
except Exception as e:
|
|
logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}")
|
|
continue
|
|
return created_documents
|
|
except Exception as e:
|
|
logger.error(f"Error creating documents from action result: {str(e)}")
|
|
return []
|
|
|
|
@staticmethod
|
|
def get_delivered_files_and_formats(documents):
|
|
delivered_files = []
|
|
delivered_formats = []
|
|
for doc in documents:
|
|
if hasattr(doc, 'filename'):
|
|
delivered_files.append(doc.filename)
|
|
file_extension = getFileExtension(doc.filename)
|
|
mime_type = getattr(doc, 'mimeType', 'application/octet-stream')
|
|
delivered_formats.append({
|
|
'filename': doc.filename,
|
|
'extension': file_extension,
|
|
'mimeType': mime_type
|
|
})
|
|
elif isinstance(doc, dict) and 'filename' in doc:
|
|
delivered_files.append(doc['filename'])
|
|
file_extension = getFileExtension(doc['filename'])
|
|
mime_type = doc.get('mimeType', 'application/octet-stream')
|
|
delivered_formats.append({
|
|
'filename': doc['filename'],
|
|
'extension': file_extension,
|
|
'mimeType': mime_type
|
|
})
|
|
else:
|
|
delivered_files.append(f"document_{len(delivered_files)}")
|
|
delivered_formats.append({
|
|
'filename': f"document_{len(delivered_files)}",
|
|
'extension': 'unknown',
|
|
'mimeType': 'application/octet-stream'
|
|
})
|
|
return delivered_files, delivered_formats |