157 lines
8 KiB
Python
157 lines
8 KiB
Python
import logging
|
|
from typing import Any, Dict, List, Optional
|
|
from datetime import datetime, UTC
|
|
import re
|
|
from modules.shared.timezoneUtils import get_utc_timestamp
|
|
from modules.services.serviceDocument.documentUtility import (
|
|
getFileExtension,
|
|
getMimeTypeFromExtension,
|
|
detectMimeTypeFromContent,
|
|
detectMimeTypeFromData,
|
|
convertDocumentDataToString
|
|
)
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class DocumentGenerator:
|
|
def __init__(self, service):
|
|
self.service = service
|
|
|
|
def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]:
|
|
"""
|
|
Process documents produced by AI actions and convert them to ChatDocument format.
|
|
This function handles AI-generated document data, not document references.
|
|
Returns a list of processed document dictionaries.
|
|
"""
|
|
try:
|
|
# Read documents from the standard documents field (not data.documents)
|
|
documents = action_result.documents if action_result and hasattr(action_result, 'documents') else []
|
|
|
|
if not documents:
|
|
logger.info(f"No documents found in action_result.documents for {action.execMethod}.{action.execAction}")
|
|
return []
|
|
|
|
logger.info(f"Processing {len(documents)} documents from action_result.documents")
|
|
|
|
# Process each document from the AI action result
|
|
processed_documents = []
|
|
for doc in documents:
|
|
processed_doc = self.processSingleDocument(doc, action)
|
|
if processed_doc:
|
|
processed_documents.append(processed_doc)
|
|
|
|
logger.info(f"Successfully processed {len(processed_documents)} documents")
|
|
return processed_documents
|
|
except Exception as e:
|
|
logger.error(f"Error processing action result documents: {str(e)}")
|
|
return []
|
|
|
|
def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]:
|
|
"""Process a single document from action result with simplified logic"""
|
|
try:
|
|
# ActionDocument objects have documentName, documentData, and mimeType
|
|
mime_type = doc.mimeType
|
|
if mime_type == "application/octet-stream":
|
|
content = doc.documentData
|
|
mime_type = detectMimeTypeFromContent(content, doc.documentName, self.service)
|
|
|
|
return {
|
|
'fileName': doc.documentName,
|
|
'fileSize': len(str(doc.documentData)),
|
|
'mimeType': mime_type,
|
|
'content': doc.documentData,
|
|
'document': doc
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Error processing single document: {str(e)}")
|
|
return None
|
|
|
|
def createDocumentsFromActionResult(self, action_result, action, workflow, message_id=None) -> List[Any]:
|
|
"""
|
|
Create actual document objects from action result and store them in the system.
|
|
Returns a list of created document objects with proper workflow context.
|
|
"""
|
|
try:
|
|
logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}")
|
|
logger.info(f"Action result documents count: {len(action_result.documents) if action_result.documents else 0}")
|
|
|
|
processed_docs = self.processActionResultDocuments(action_result, action, workflow)
|
|
logger.info(f"Processed {len(processed_docs)} documents")
|
|
|
|
created_documents = []
|
|
for i, doc_data in enumerate(processed_docs):
|
|
try:
|
|
document_name = doc_data['fileName']
|
|
document_data = doc_data['content']
|
|
mime_type = doc_data['mimeType']
|
|
|
|
logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})")
|
|
|
|
# Convert document data to string content
|
|
content = convertDocumentDataToString(document_data, getFileExtension(document_name))
|
|
|
|
# Skip empty or minimal content
|
|
minimal_content_patterns = ['{}', '[]', 'null', '""', "''"]
|
|
if not content or content.strip() == "" or content.strip() in minimal_content_patterns:
|
|
logger.warning(f"Empty or minimal content for document {document_name}, skipping")
|
|
continue
|
|
|
|
logger.info(f"Document {document_name} has content: {len(content)} characters")
|
|
|
|
# Create document with file in one step
|
|
document = self.service.createDocument(
|
|
fileName=document_name,
|
|
mimeType=mime_type,
|
|
content=content,
|
|
base64encoded=False,
|
|
messageId=message_id
|
|
)
|
|
if document:
|
|
# Set workflow context on the document if possible
|
|
self._setDocumentWorkflowContext(document, action, workflow)
|
|
created_documents.append(document)
|
|
logger.info(f"Successfully created ChatDocument: {document_name} (ID: {document.id if hasattr(document, 'id') else 'N/A'}, fileId: {document.fileId if hasattr(document, 'fileId') else 'N/A'})")
|
|
else:
|
|
logger.error(f"Failed to create ChatDocument object for {document_name}")
|
|
except Exception as e:
|
|
logger.error(f"Error creating document {doc_data.get('fileName', 'unknown')}: {str(e)}")
|
|
continue
|
|
|
|
logger.info(f"Successfully created {len(created_documents)} documents")
|
|
return created_documents
|
|
except Exception as e:
|
|
logger.error(f"Error creating documents from action result: {str(e)}")
|
|
return []
|
|
|
|
def _setDocumentWorkflowContext(self, document, action, workflow):
|
|
"""Set workflow context on a document for proper routing and labeling"""
|
|
try:
|
|
# Get current workflow context from service center
|
|
workflow_context = self.service.getWorkflowContext()
|
|
workflow_stats = self.service.getWorkflowStats()
|
|
|
|
current_round = workflow_context.get('currentRound', 0)
|
|
current_task = workflow_context.get('currentTask', 0)
|
|
current_action = workflow_context.get('currentAction', 0)
|
|
|
|
# Try to set workflow context attributes if they exist
|
|
if hasattr(document, 'roundNumber'):
|
|
document.roundNumber = current_round
|
|
if hasattr(document, 'taskNumber'):
|
|
document.taskNumber = current_task
|
|
if hasattr(document, 'actionNumber'):
|
|
document.actionNumber = current_action
|
|
if hasattr(document, 'actionId'):
|
|
document.actionId = action.id if hasattr(action, 'id') else None
|
|
|
|
# Set additional workflow metadata if available
|
|
if hasattr(document, 'workflowId'):
|
|
document.workflowId = workflow_stats.get('workflowId', workflow.id if hasattr(workflow, 'id') else None)
|
|
if hasattr(document, 'workflowStatus'):
|
|
document.workflowStatus = workflow_stats.get('workflowStatus', workflow.status if hasattr(workflow, 'status') else 'unknown')
|
|
|
|
logger.debug(f"Set workflow context on document: Round {current_round}, Task {current_task}, Action {current_action}")
|
|
logger.debug(f"Document workflow metadata: ID={document.workflowId if hasattr(document, 'workflowId') else 'N/A'}, Status={document.workflowStatus if hasattr(document, 'workflowStatus') else 'N/A'}")
|
|
|
|
except Exception as e:
|
|
logger.warning(f"Could not set workflow context on document: {str(e)}")
|