diff --git a/modules/chat/documents/documentExtraction.py b/modules/chat/documents/documentExtraction.py index ea96289d..a304cbe3 100644 --- a/modules/chat/documents/documentExtraction.py +++ b/modules/chat/documents/documentExtraction.py @@ -341,7 +341,7 @@ class DocumentExtraction: # Use documentUtility for mime type - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="main", data=content, @@ -360,7 +360,7 @@ class DocumentExtraction: """Process CSV document with robust encoding detection""" try: content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="main", data=content, @@ -380,7 +380,7 @@ class DocumentExtraction: try: content = self._robustTextDecode(fileData, fileName) jsonData = json.loads(content) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="main", data=content, @@ -399,7 +399,7 @@ class DocumentExtraction: """Process XML document with robust encoding detection""" try: content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="main", data=content, @@ -418,7 +418,7 @@ class DocumentExtraction: """Process HTML document with robust encoding detection""" try: content = self._robustTextDecode(fileData, fileName) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="main", data=content, @@ -512,7 +512,7 @@ class DocumentExtraction: # Combine all meaningful content final_content = "\n".join(meaningful_content) - mime_type = getMimeTypeFromExtension(getFileExtension(fileName), self._serviceCenter) + mime_type = getMimeTypeFromExtension(getFileExtension(fileName)) return [ContentItem( label="svg_content", data=final_content, diff --git a/modules/chat/documents/documentGeneration.py b/modules/chat/documents/documentGeneration.py index 5534462a..dfe10918 100644 --- a/modules/chat/documents/documentGeneration.py +++ b/modules/chat/documents/documentGeneration.py @@ -98,26 +98,12 @@ class DocumentGenerator: logger.info(f"Document {document_name} has content: {len(content)} characters") - # Create file in system - file_id = self.service.createFile( - fileName=document_name, - mimeType=mime_type, - content=content, - base64encoded=False - ) - if not file_id: - logger.error(f"Failed to create file for document {document_name}") - continue - - logger.info(f"Created file with ID: {file_id}") - - # Create document object using existing file ID + # Create document with file in one step document = self.service.createDocument( fileName=document_name, mimeType=mime_type, content=content, - base64encoded=False, - existing_file_id=file_id + base64encoded=False ) if document: # Set workflow context on the document if possible diff --git a/modules/chat/documents/documentUtility.py b/modules/chat/documents/documentUtility.py index 3d674720..5b0a612c 100644 --- a/modules/chat/documents/documentUtility.py +++ b/modules/chat/documents/documentUtility.py @@ -1,51 +1,160 @@ import json import logging +import os from typing import Any, Dict logger = logging.getLogger(__name__) def getFileExtension(fileName: str) -> str: - """Extract file extension from fileName""" + """Extract file extension from fileName (without dot, lowercased).""" if '.' in fileName: return fileName.rsplit('.', 1)[-1].lower() return '' -def getMimeTypeFromExtension(extension: str, service=None) -> str: - """Get MIME type based on file extension. Optionally use a service for mapping.""" - if service: - return service.getMimeTypeFromExtension(extension) - # Fallback mapping - mapping = { +def getMimeTypeFromExtension(extension: str) -> str: + """ + Get MIME type based on file extension. + This method consolidates MIME type detection from extension. + + Args: + extension: File extension (with or without dot) + + Returns: + str: MIME type for the extension + """ + # Normalize extension (remove dot if present) + if extension.startswith('.'): + extension = extension[1:] + + # Map extensions to MIME types + mime_types = { 'txt': 'text/plain', - 'md': 'text/markdown', - 'html': 'text/html', - 'css': 'text/css', - 'js': 'application/javascript', 'json': 'application/json', - 'csv': 'text/csv', 'xml': 'application/xml', + 'csv': 'text/csv', + 'html': 'text/html', + 'htm': 'text/html', + 'md': 'text/markdown', 'py': 'text/x-python', + 'js': 'application/javascript', + 'css': 'text/css', 'pdf': 'application/pdf', + 'doc': 'application/msword', 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + 'xls': 'application/vnd.ms-excel', 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - 'png': 'image/png', + 'ppt': 'application/vnd.ms-powerpoint', + 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + 'svg': 'image/svg+xml', 'jpg': 'image/jpeg', 'jpeg': 'image/jpeg', + 'png': 'image/png', 'gif': 'image/gif', - 'svg': 'image/svg+xml', + 'bmp': 'image/bmp', + 'webp': 'image/webp', + 'zip': 'application/zip', + 'rar': 'application/x-rar-compressed', + '7z': 'application/x-7z-compressed', + 'tar': 'application/x-tar', + 'gz': 'application/gzip' } - return mapping.get(extension.lower(), 'application/octet-stream') + return mime_types.get(extension.lower(), 'application/octet-stream') + +def detectContentTypeFromData(fileData: bytes, fileName: str) -> str: + """ + Detect content type from file data and fileName. + This method makes the MIME type detection function accessible through the service center. + + Args: + fileData: Raw file data as bytes + fileName: Name of the file + + Returns: + str: Detected MIME type + """ + try: + # Check file extension first + ext = os.path.splitext(fileName)[1].lower() + if ext: + # Map common extensions to MIME types + extToMime = { + '.txt': 'text/plain', + '.md': 'text/markdown', + '.csv': 'text/csv', + '.json': 'application/json', + '.xml': 'application/xml', + '.js': 'application/javascript', + '.py': 'application/x-python', + '.svg': 'image/svg+xml', + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.gif': 'image/gif', + '.bmp': 'image/bmp', + '.webp': 'image/webp', + '.pdf': 'application/pdf', + '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', + '.doc': 'application/msword', + '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + '.xls': 'application/vnd.ms-excel', + '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', + '.ppt': 'application/vnd.ms-powerpoint', + '.html': 'text/html', + '.htm': 'text/html', + '.css': 'text/css', + '.zip': 'application/zip', + '.rar': 'application/x-rar-compressed', + '.7z': 'application/x-7z-compressed', + '.tar': 'application/x-tar', + '.gz': 'application/gzip' + } + if ext in extToMime: + return extToMime[ext] + + # Try to detect from content + if fileData.startswith(b'%PDF'): + return 'application/pdf' + elif fileData.startswith(b'PK\x03\x04'): + # ZIP-based formats (docx, xlsx, pptx) + return 'application/zip' + elif fileData.startswith(b'<'): + # XML-based formats + try: + text = fileData.decode('utf-8', errors='ignore') + if ' str: """Detect MIME type from file bytes and fileName using a service if provided.""" try: - if service: + if service and hasattr(service, 'detectContentTypeFromData'): detected = service.detectContentTypeFromData(file_bytes, fileName) if detected and detected != 'application/octet-stream': return detected - # Fallback: guess from extension - ext = getFileExtension(fileName) - return getMimeTypeFromExtension(ext, service) + # Fallback: use our consolidated function + return detectContentTypeFromData(file_bytes, fileName) except Exception as e: logger.warning(f"Error in MIME type detection for {fileName}: {str(e)}") return 'application/octet-stream' diff --git a/modules/chat/handling/handlingTasks.py b/modules/chat/handling/handlingTasks.py index d99521f2..88465b0e 100644 --- a/modules/chat/handling/handlingTasks.py +++ b/modules/chat/handling/handlingTasks.py @@ -108,7 +108,7 @@ class HandlingTasks: # Log the full task planning prompt being sent to AI for debugging logger.info("=== TASK PLANNING PROMPT SENT TO AI ===") logger.info(f"User Input: {userInput}") - logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}") + logger.info(f"Available Documents: {available_docs}") logger.info("=== FULL TASK PLANNING PROMPT ===") logger.info(task_planning_prompt) logger.info("=== END TASK PLANNING PROMPT ===") @@ -312,12 +312,8 @@ class HandlingTasks: # Log available resources for debugging logger.info("=== AVAILABLE RESOURCES FOR ACTION GENERATION ===") - logger.info(f"Available Documents: {len(available_docs) if available_docs else 0}") - if available_docs: - for i, doc in enumerate(available_docs[:5]): # Show first 5 - logger.info(f" Doc {i+1}: {doc}") - if len(available_docs) > 5: - logger.info(f" ... and {len(available_docs) - 5} more documents") + logger.info(f"Available Documents: {available_docs}") + # Note: available_docs is now a string description, not a list logger.info(f"Available Connections: {len(available_connections) if available_connections else 0}") if available_connections: for i, conn in enumerate(available_connections[:5]): # Show first 5 @@ -376,7 +372,7 @@ class HandlingTasks: logger.info(f"Task Step ID: {action_context.task_step.id if action_context.task_step else 'None'}") logger.info(f"Task Step Objective: {action_context.task_step.objective if action_context.task_step else 'None'}") logger.info(f"Workflow ID: {action_context.workflow_id}") - logger.info(f"Available Documents Count: {len(action_context.available_documents) if action_context.available_documents else 0}") + logger.info(f"Available Documents: {action_context.available_documents or 'No documents available'}") logger.info(f"Available Connections Count: {len(action_context.available_connections) if action_context.available_connections else 0}") logger.info(f"Previous Results Count: {len(action_context.previous_results) if action_context.previous_results else 0}") logger.info(f"Retry Count: {action_context.retry_count}") diff --git a/modules/chat/handling/methodOutlook.py b/modules/chat/handling/methodOutlook.py deleted file mode 100644 index b28b04f6..00000000 --- a/modules/chat/handling/methodOutlook.py +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/modules/chat/handling/promptFactory.py b/modules/chat/handling/promptFactory.py index 4587cce4..9faa06b3 100644 --- a/modules/chat/handling/promptFactory.py +++ b/modules/chat/handling/promptFactory.py @@ -20,13 +20,13 @@ def createTaskPlanningPrompt(context: TaskContext, service) -> str: user_request = context.task_step.objective if context.task_step else 'No request specified' # Extract available documents from context - use Pydantic model directly - available_documents = context.available_documents or [] + available_documents = context.available_documents or "No documents available" return f"""You are a task planning AI that analyzes user requests and creates structured task plans with user-friendly feedback messages. USER REQUEST: {user_request} -AVAILABLE DOCUMENTS: {', '.join(available_documents)} +AVAILABLE DOCUMENTS: {available_documents} INSTRUCTIONS: 1. Analyze the user request and available documents diff --git a/modules/chat/serviceCenter.py b/modules/chat/serviceCenter.py index 703531f0..cef1555b 100644 --- a/modules/chat/serviceCenter.py +++ b/modules/chat/serviceCenter.py @@ -14,6 +14,7 @@ from modules.interfaces.interfaceChatModel import ActionResult from modules.interfaces.interfaceComponentObjects import getInterface as getComponentObjects from modules.interfaces.interfaceAppObjects import getInterface as getAppObjects from modules.chat.documents.documentExtraction import DocumentExtraction +from modules.chat.documents.documentUtility import getFileExtension, getMimeTypeFromExtension, detectContentTypeFromData from modules.chat.methodBase import MethodBase from modules.shared.timezoneUtils import get_utc_timestamp import uuid @@ -111,165 +112,9 @@ class ServiceCenter: except Exception as e: logger.error(f"Error discovering methods: {str(e)}") - def detectContentTypeFromData(self, fileData: bytes, fileName: str) -> str: - """ - Detect content type from file data and fileName. - This method makes the MIME type detection function accessible through the service center. - - Args: - fileData: Raw file data as bytes - fileName: Name of the file - - Returns: - str: Detected MIME type - """ - try: - # Check file extension first - ext = os.path.splitext(fileName)[1].lower() - if ext: - # Map common extensions to MIME types - extToMime = { - '.txt': 'text/plain', - '.md': 'text/markdown', - '.csv': 'text/csv', - '.json': 'application/json', - '.xml': 'application/xml', - '.js': 'application/javascript', - '.py': 'application/x-python', - '.svg': 'image/svg+xml', - '.jpg': 'image/jpeg', - '.jpeg': 'image/jpeg', - '.png': 'image/png', - '.gif': 'image/gif', - '.bmp': 'image/bmp', - '.webp': 'image/webp', - '.pdf': 'application/pdf', - '.docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - '.doc': 'application/msword', - '.xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - '.xls': 'application/vnd.ms-excel', - '.pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - '.ppt': 'application/vnd.ms-powerpoint', - '.html': 'text/html', - '.htm': 'text/html', - '.css': 'text/css', - '.zip': 'application/zip', - '.rar': 'application/x-rar-compressed', - '.7z': 'application/x-7z-compressed', - '.tar': 'application/x-tar', - '.gz': 'application/gzip' - } - if ext in extToMime: - return extToMime[ext] - - # Try to detect from content - if fileData.startswith(b'%PDF'): - return 'application/pdf' - elif fileData.startswith(b'PK\x03\x04'): - # ZIP-based formats (docx, xlsx, pptx) - return 'application/zip' - elif fileData.startswith(b'<'): - # XML-based formats - try: - text = fileData.decode('utf-8', errors='ignore') - if ' str: - """ - Get MIME type based on file extension. - This method consolidates MIME type detection from extension. - - Args: - extension: File extension (with or without dot) - - Returns: - str: MIME type for the extension - """ - # Normalize extension (remove dot if present) - if extension.startswith('.'): - extension = extension[1:] - - # Map extensions to MIME types - mime_types = { - 'txt': 'text/plain', - 'json': 'application/json', - 'xml': 'application/xml', - 'csv': 'text/csv', - 'html': 'text/html', - 'htm': 'text/html', - 'md': 'text/markdown', - 'py': 'text/x-python', - 'js': 'application/javascript', - 'css': 'text/css', - 'pdf': 'application/pdf', - 'doc': 'application/msword', - 'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document', - 'xls': 'application/vnd.ms-excel', - 'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', - 'ppt': 'application/vnd.ms-powerpoint', - 'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation', - 'svg': 'image/svg+xml', - 'jpg': 'image/jpeg', - 'jpeg': 'image/jpeg', - 'png': 'image/png', - 'gif': 'image/gif', - 'bmp': 'image/bmp', - 'webp': 'image/webp', - 'zip': 'application/zip', - 'rar': 'application/x-rar-compressed', - '7z': 'application/x-7z-compressed', - 'tar': 'application/x-tar', - 'gz': 'application/gzip' - } - return mime_types.get(extension.lower(), 'application/octet-stream') - def getFileExtension(self, fileName: str) -> str: - """ - Extract file extension from fileName. - - Args: - fileName: Name of the file - - Returns: - str: File extension (without dot) - """ - if '.' in fileName: - return fileName.split('.')[-1].lower() - return "txt" # Default to text - - def getFileExtension(self, fileName): - """ - Extract file extension from fileName (without dot, lowercased). - Returns empty string if no extension is found. - """ - if '.' in fileName: - return fileName.rsplit('.', 1)[-1].lower() - return '' - - # ===== Functions ===== + # ===== Functions for Prompts: Context ===== def getMethodsList(self) -> List[str]: """Get list of available methods with their signatures in the required format""" @@ -283,157 +128,48 @@ class ServiceCenter: methodList.append(signature) return methodList - def generateDocumentLabel(self, document: ChatDocument, message: ChatMessage) -> str: - """Generate new document label: round+task+action+filename.extension""" + async def summarizeChat(self, messages: List[ChatMessage]) -> str: + """ + Summarize chat messages from last to first message with status="first" + + Args: + messages: List of chat messages to summarize + + Returns: + str: Summary of the chat in user's language + """ try: - # Get workflow context from message - round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1 - task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0 - action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0 + # Get messages from last to first, stopping at first message with status="first" + relevantMessages = [] + for msg in reversed(messages): + relevantMessages.append(msg) + if msg.status == "first": + break - # Get file extension from document's fileName property - try: - file_extension = self.getFileExtension(document.fileName) - filename = document.fileName - except Exception as e: - # Try to diagnose and recover the issue - diagnosis = self.diagnoseDocumentAccess(document) - logger.error(f"Critical error: Cannot access document fileName for document {document.id}. Diagnosis: {diagnosis}") - - # Attempt recovery - if self.recoverDocumentAccess(document): - try: - file_extension = self.getFileExtension(document.fileName) - filename = document.fileName - logger.info(f"Document access recovered for {document.id}") - except Exception as recovery_error: - logger.error(f"Recovery failed for document {document.id}: {str(recovery_error)}") - raise RuntimeError(f"Document {document.id} is permanently inaccessible after recovery attempt: {str(recovery_error)}") - else: - # Recovery failed - don't continue with invalid data - raise RuntimeError(f"Document {document.id} is inaccessible and recovery failed. Diagnosis: {diagnosis}") - - # Construct label: round1_task2_action3_filename.ext - if file_extension: - label = f"round{round_num}_task{task_num}_action{action_num}_{filename}" - else: - label = f"round{round_num}_task{task_num}_action{action_num}_{filename}" - - return label - except Exception as e: - logger.error(f"Critical error generating document label for document {document.id}: {str(e)}") - # Re-raise the error to prevent workflow from continuing with invalid data - raise + # Create prompt for AI + prompt = f"""You are an AI assistant providing a summary of a chat conversation. +Please respond in '{self.user.language}' language. - def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]: - """Get list of document exchanges with new labeling format, sorted by recency""" - # Collect all documents first and refresh their attributes - all_documents = [] - for message in self.workflow.messages: - if message.documents: - all_documents.extend(message.documents) - - # Refresh file attributes for all documents - if all_documents: - self.refreshDocumentFileAttributes(all_documents) - - chat_exchanges = [] - history_exchanges = [] - - # Process messages in reverse order; "first" marks boundary - in_current_round = True - for message in reversed(self.workflow.messages): - is_first = message.status == "first" if hasattr(message, 'status') else False +Chat History: +{chr(10).join(f"- {msg.message}" for msg in reversed(relevantMessages))} + +Instructions: +1. Summarize the conversation's key points and outcomes +2. Be concise but informative +3. Use a professional but friendly tone +4. Focus on important decisions and next steps if any + +Please provide a comprehensive summary of this conversation.""" - # Build a DocumentExchange if message has documents - doc_exchange = None - if message.documents: - if message.actionId and message.documentsLabel: - # Validate that we use the same label as in the message - validated_label = self._validateDocumentLabelConsistency(message) - - # Use the message's actual documentsLabel - doc_refs = [] - for doc in message.documents: - doc_ref = self.getDocumentReferenceFromChatDocument(doc, message) - doc_refs.append(doc_ref) - - doc_exchange = DocumentExchange( - documentsLabel=validated_label, - documents=doc_refs - ) - else: - # Generate new labels for documents without explicit labels - doc_refs = [] - for doc in message.documents: - doc_ref = self.getDocumentReferenceFromChatDocument(doc, message) - doc_refs.append(doc_ref) - - if doc_refs: - # Create a label based on message context - round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1 - task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0 - action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0 - context_label = f"round{round_num}_task{task_num}_action{action_num}_context" - - doc_exchange = DocumentExchange( - documentsLabel=context_label, - documents=doc_refs - ) + # Get summary using AI + return await self.callAiTextBasic(prompt) - # Append to appropriate container based on boundary - if doc_exchange: - if in_current_round: - chat_exchanges.append(doc_exchange) - else: - history_exchanges.append(doc_exchange) - - # Flip boundary after including the "first" message in chat - if in_current_round and is_first: - in_current_round = False - - # Sort by recency: most recent first, then current round, then earlier rounds - # Sort chat exchanges by message sequence number (most recent first) - chat_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True) - # Sort history exchanges by message sequence number (most recent first) - history_exchanges.sort(key=lambda x: self._getMessageSequenceForExchange(x), reverse=True) - - return { - "chat": chat_exchanges, - "history": history_exchanges - } + except Exception as e: + logger.error(f"Error summarizing chat: {str(e)}") + return f"Error summarizing chat: {str(e)}" + + # ===== Functions for Prompts + Actions: Document References generation and resolution ===== - def _getMessageSequenceForExchange(self, exchange: DocumentExchange) -> int: - """Get message sequence number for sorting exchanges by recency""" - try: - # Extract message ID from the first document reference - if exchange.documents and len(exchange.documents) > 0: - first_doc_ref = exchange.documents[0] - if first_doc_ref.startswith("docItem:"): - # docItem::