import logging from typing import Any, Dict, List, Optional from datetime import datetime, UTC from .documentUtility import ( getFileExtension, getMimeTypeFromExtension, detectMimeTypeFromContent, detectMimeTypeFromData, convertDocumentDataToString ) logger = logging.getLogger(__name__) class DocumentGenerator: def __init__(self, service): self.service = service def processActionResultDocuments(self, action_result, action, workflow) -> List[Dict[str, Any]]: """ Main function to process documents from an action result. Returns a list of processed document dictionaries. """ try: documents = action_result.data.get("documents", []) processed_documents = [] for doc in documents: processed_doc = self.processSingleDocument(doc, action) if processed_doc: processed_documents.append(processed_doc) return processed_documents except Exception as e: logger.error(f"Error processing action result documents: {str(e)}") return [] def processSingleDocument(self, doc: Any, action) -> Optional[Dict[str, Any]]: """Process a single document from action result""" try: if hasattr(doc, 'filename') and doc.filename: # Document object with filename attribute mime_type = getattr(doc, 'mimeType', 'application/octet-stream') if mime_type == "application/octet-stream": content = getattr(doc, 'content', '') mime_type = detectMimeTypeFromContent(content, doc.filename, self.service) # Add result label to filename for document objects too base_filename = doc.filename if hasattr(action, 'execResultLabel') and action.execResultLabel: result_label = action.execResultLabel.strip() if result_label: # Check if filename already starts with resultLabel to avoid duplication if not base_filename.startswith(f"{result_label}-"): base_filename = f"{result_label}-{base_filename}" logger.info(f"Added resultLabel '{result_label}' as prefix to document object filename: {base_filename}") else: logger.info(f"Document object filename already has resultLabel prefix: {base_filename}") return { 'filename': base_filename, 'fileSize': getattr(doc, 'fileSize', 0), 'mimeType': mime_type, 'content': getattr(doc, 'content', ''), 'document': doc } elif isinstance(doc, dict): # Dictionary format document - handle both 'documentName' and 'filename' keys base_filename = doc.get('documentName', doc.get('filename', '')) # Debug logging for resultLabel if hasattr(action, 'execResultLabel'): logger.info(f"Action {action.execMethod}.{action.execAction} has execResultLabel: '{action.execResultLabel}' (type: {type(action.execResultLabel)})") else: logger.info(f"Action {action.execMethod}.{action.execAction} has NO execResultLabel attribute") # If no filename provided, generate one with action info if not base_filename: base_filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}" # ALWAYS add result label to filename for better document selection # This ensures consistent naming regardless of whether filename was provided or generated if hasattr(action, 'execResultLabel') and action.execResultLabel: result_label = action.execResultLabel.strip() if result_label: # Check if filename already starts with resultLabel to avoid duplication if not base_filename.startswith(f"{result_label}-"): base_filename = f"{result_label}-{base_filename}" logger.info(f"Added resultLabel '{result_label}' as prefix to filename: {base_filename}") else: logger.info(f"Filename already has resultLabel prefix: {base_filename}") else: logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}") filename = base_filename mimeType = doc.get('mimeType', 'application/octet-stream') # Handle documentData structure - it might be a dict with 'content' key or direct content document_data = doc.get('documentData', '') if isinstance(document_data, dict) and 'content' in document_data: # This is the structure returned by extract action: documentData.content content = document_data['content'] # Also check for other potential content fields if not content and 'data' in document_data: content = document_data['data'] else: # Direct content (fallback) content = document_data # Calculate file size from actual content fileSize = len(str(content)) if content else 0 # Detect mime type if not specified if mimeType == "application/octet-stream": mimeType = detectMimeTypeFromContent(content, filename, self.service) logger.info(f"Processed document: {filename}, content length: {len(str(content))}, mimeType: {mimeType}") return { 'filename': filename, 'fileSize': fileSize, 'mimeType': mimeType, 'content': content, 'document': doc } else: # Unknown document type logger.warning(f"Unknown document type for action {action.execMethod}.{action.execAction}: {type(doc)}") base_filename = f"{action.execMethod}_{action.execAction}_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}" # ALWAYS add result label to filename for better document selection # This ensures consistent naming regardless of document type if hasattr(action, 'execResultLabel') and action.execResultLabel: result_label = action.execResultLabel.strip() if result_label: # Check if filename already starts with resultLabel to avoid duplication if not base_filename.startswith(f"{result_label}-"): base_filename = f"{result_label}-{base_filename}" logger.info(f"Added resultLabel '{result_label}' as prefix to fallback filename: {base_filename}") else: logger.info(f"Fallback filename already has resultLabel prefix: {base_filename}") else: logger.info(f"No resultLabel available for action {action.execMethod}.{action.execAction}") filename = base_filename mimeType = detectMimeTypeFromContent(doc, filename, self.service) return { 'filename': filename, 'fileSize': 0, 'mimeType': mimeType, 'content': str(doc), 'document': doc } except Exception as e: logger.error(f"Error processing single document: {str(e)}") return None def createDocumentsFromActionResult(self, action_result, action, workflow) -> List[Any]: """ Create actual document objects from action result and store them in the system. Returns a list of created document objects. """ try: logger.info(f"Creating documents from action result for {action.execMethod}.{action.execAction}") logger.info(f"Action result data keys: {list(action_result.data.keys())}") processed_docs = self.processActionResultDocuments(action_result, action, workflow) logger.info(f"Processed {len(processed_docs)} documents") created_documents = [] for i, doc_data in enumerate(processed_docs): try: document_name = doc_data['filename'] document_data = doc_data['content'] mime_type = doc_data['mimeType'] logger.info(f"Creating document {i+1}: {document_name} (mime: {mime_type}, content length: {len(str(document_data))})") # Convert document data to string content content = convertDocumentDataToString(document_data, getFileExtension(document_name)) # Skip empty or minimal content minimal_content_patterns = ['{}', '[]', 'null', '""', "''"] if not content or content.strip() == "" or content.strip() in minimal_content_patterns: logger.warning(f"Empty or minimal content for document {document_name}, skipping") continue logger.info(f"Document {document_name} has content: {len(content)} characters") # Create file in system file_id = self.service.createFile( fileName=document_name, mimeType=mime_type, content=content, base64encoded=False ) if not file_id: logger.error(f"Failed to create file for document {document_name}") continue logger.info(f"Created file with ID: {file_id}") # Create document object using existing file ID document = self.service.createDocument( fileName=document_name, mimeType=mime_type, content=content, base64encoded=False, existing_file_id=file_id ) if document: created_documents.append(document) logger.info(f"Successfully created ChatDocument: {document_name} (ID: {getattr(document, 'id', 'N/A')}, fileId: {getattr(document, 'fileId', 'N/A')})") else: logger.error(f"Failed to create ChatDocument object for {document_name}") except Exception as e: logger.error(f"Error creating document {doc_data.get('filename', 'unknown')}: {str(e)}") continue logger.info(f"Successfully created {len(created_documents)} documents") return created_documents except Exception as e: logger.error(f"Error creating documents from action result: {str(e)}") return []