diff --git a/modules/interfaces/interfaceChatModel.py b/modules/interfaces/interfaceChatModel.py index 53c5f4d6..410f676f 100644 --- a/modules/interfaces/interfaceChatModel.py +++ b/modules/interfaces/interfaceChatModel.py @@ -86,6 +86,8 @@ class ContentMetadata(BaseModel, ModelMixin): colorMode: Optional[str] = Field(None, description="Color mode (e.g., RGB, CMYK, grayscale)") fps: Optional[float] = Field(None, description="Frames per second for videos") durationSec: Optional[float] = Field(None, description="Duration in seconds for videos/audio") + mimeType: str = Field(description="MIME type of the content") + base64Encoded: bool = Field(description="Whether the data is base64 encoded") # Register labels for ContentMetadata register_model_labels( @@ -99,7 +101,9 @@ register_model_labels( "height": {"en": "Height", "fr": "Hauteur"}, "colorMode": {"en": "Color Mode", "fr": "Mode de couleur"}, "fps": {"en": "FPS", "fr": "IPS"}, - "durationSec": {"en": "Duration", "fr": "Durée"} + "durationSec": {"en": "Duration", "fr": "Durée"}, + "mimeType": {"en": "MIME Type", "fr": "Type MIME"}, + "base64Encoded": {"en": "Base64 Encoded", "fr": "Encodé en Base64"} } ) @@ -107,8 +111,6 @@ class ContentItem(BaseModel, ModelMixin): """Individual content item from a document""" label: str = Field(description="Content label (e.g., tab name, tag name)") data: str = Field(description="Extracted text content") - mimeType: str = Field(description="MIME type of the content") - base64Encoded: bool = Field(description="Whether the data is base64 encoded") metadata: ContentMetadata = Field(description="Content metadata") # Register labels for ContentItem @@ -143,6 +145,21 @@ register_model_labels( } ) +class DocumentExchange(BaseModel, ModelMixin): + """Data model for document exchange between AI actions""" + documentsLabel: str = Field(description="Label for the set of documents") + documents: List[str] = Field(default_factory=list, description="List of document references") + +# Register labels for DocumentExchange +register_model_labels( + "DocumentExchange", + {"en": "Document Exchange", "fr": "Échange de documents"}, + { + "documentsLabel": {"en": "Documents Label", "fr": "Label des documents"}, + "documents": {"en": "Documents", "fr": "Documents"} + } +) + class ExtractedContent(BaseModel, ModelMixin): """Data model for extracted content""" id: str = Field(description="Reference to source ChatDocument") diff --git a/modules/interfaces/interfaceComponentObjects.py b/modules/interfaces/interfaceComponentObjects.py index 272344c9..7e6f0c4d 100644 --- a/modules/interfaces/interfaceComponentObjects.py +++ b/modules/interfaces/interfaceComponentObjects.py @@ -504,20 +504,25 @@ class ComponentObjects: return newFilename counter += 1 - def createFile(self, name: str, mimeType: str, size: int = None, fileHash: str = None) -> FileItem: - """Creates a new file entry if user has permission.""" + def createFile(self, name: str, mimeType: str, content: bytes) -> FileItem: + """Creates a new file entry if user has permission. Computes fileHash and fileSize from content.""" + import hashlib if not self._canModify("files"): raise PermissionError("No permission to create files") - + # Ensure filename is unique uniqueName = self._generateUniqueFilename(name) - + + # Compute file size and hash + fileSize = len(content) + fileHash = hashlib.sha256(content).hexdigest() + # Create FileItem instance fileItem = FileItem( mandateId=self.currentUser.mandateId, filename=uniqueName, mimeType=mimeType, - fileSize=size, + fileSize=fileSize, fileHash=fileHash ) @@ -818,27 +823,15 @@ class ComponentObjects: logger.error(f"Invalid fileContent type: {type(fileContent)}") raise ValueError(f"fileContent must be bytes, got {type(fileContent)}") - # Calculate file hash for deduplication - fileHash = self.calculateFileHash(fileContent) - logger.debug(f"Calculated file hash: {fileHash}") - - # Check for duplicate within same user/mandate - existingFile = self.checkForDuplicateFile(fileHash) - if existingFile: - logger.debug(f"Duplicate found for {fileName}: {existingFile.id}") - return existingFile - - # Determine MIME type and size + # Determine MIME type mimeType = self.getMimeType(fileName) - fileSize = len(fileContent) - # Save metadata + # Save metadata and file (hash/size computed inside createFile) logger.debug(f"Saving file metadata to database for file: {fileName}") fileItem = self.createFile( name=fileName, mimeType=mimeType, - size=fileSize, - fileHash=fileHash + content=fileContent ) # Save binary data diff --git a/modules/methods/methodCoder.py b/modules/methods/methodCoder.py index ea6937a4..e0e09bb4 100644 --- a/modules/methods/methodCoder.py +++ b/modules/methods/methodCoder.py @@ -46,7 +46,8 @@ class MethodCoder(MethodBase): error="AI prompt is required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + # Handle new document list format (list of strings) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -59,15 +60,15 @@ class MethodCoder(MethodBase): for chatDocument in chatDocuments: fileId = chatDocument.fileId - code = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) + code = self.service.getFileData(fileId) + file_info = self.service.getFileInfo(fileId) if not code: logger.warning(f"Code file is empty for fileId: {fileId}") continue # Use AI prompt to extract relevant code content - extracted_content = await self.serviceContainer.extractContentFromFileData( + extracted_content = await self.service.extractContentFromFileData( prompt=aiPrompt, fileData=code, filename=file_info.get('name', 'code'), @@ -107,7 +108,7 @@ class MethodCoder(MethodBase): """ # Use AI service for analysis - analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) + analysis_result = await self.service.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) # Create result data result_data = { @@ -121,8 +122,12 @@ class MethodCoder(MethodBase): return self._createResult( success=True, data={ - "documentName": f"code_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"code_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -173,7 +178,7 @@ class MethodCoder(MethodBase): """ # Use AI service for code generation - generated_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(generation_prompt) + generated_code = await self.service.interfaceAiCalls.callAiTextAdvanced(generation_prompt) # Create result data result_data = { @@ -228,7 +233,8 @@ class MethodCoder(MethodBase): error="AI improvement prompt is required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + # Handle new document list format (list of strings) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -241,8 +247,8 @@ class MethodCoder(MethodBase): for chatDocument in chatDocuments: fileId = chatDocument.fileId - code = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) + code = self.service.getFileData(fileId) + file_info = self.service.getFileInfo(fileId) if not code: logger.warning(f"Code file is empty for fileId: {fileId}") @@ -266,7 +272,7 @@ class MethodCoder(MethodBase): """ # Use AI service for refactoring - refactored_code = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(refactor_prompt) + refactored_code = await self.service.interfaceAiCalls.callAiTextAdvanced(refactor_prompt) refactored_results.append({ "original_file": file_info.get('name', 'unknown'), diff --git a/modules/methods/methodDocument.py b/modules/methods/methodDocument.py index a8659037..dee445b0 100644 --- a/modules/methods/methodDocument.py +++ b/modules/methods/methodDocument.py @@ -54,7 +54,7 @@ class MethodDocument(MethodBase): error="AI prompt is required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -68,19 +68,20 @@ class MethodDocument(MethodBase): for chatDocument in chatDocuments: fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) + file_data = self.service.getFileData(fileId) + file_info = self.service.getFileInfo(fileId) if not file_data: logger.warning(f"File not found or empty for fileId: {fileId}") continue - extracted_content = await self.serviceContainer.extractContentFromFileData( + extracted_content = await self.service.extractContentFromFileData( prompt=aiPrompt, fileData=file_data, filename=file_info.get('name', 'document'), mimeType=file_info.get('mimeType', 'application/octet-stream'), - base64Encoded=False + base64Encoded=False, + documentId=chatDocument.id ) all_extracted_content.append(extracted_content) @@ -108,8 +109,12 @@ class MethodDocument(MethodBase): return self._createResult( success=True, data={ - "documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", - "documentData": result_data + "documents": [ + { + "documentName": f"extracted_content_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", + "documentData": result_data + } + ] } ) except Exception as e: @@ -149,7 +154,7 @@ class MethodDocument(MethodBase): error="AI prompt is required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -162,19 +167,20 @@ class MethodDocument(MethodBase): for chatDocument in chatDocuments: fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) + file_data = self.service.getFileData(fileId) + file_info = self.service.getFileInfo(fileId) if not file_data: logger.warning(f"File not found or empty for fileId: {fileId}") continue - extracted_content = await self.serviceContainer.extractContentFromFileData( + extracted_content = await self.service.extractContentFromFileData( prompt=aiPrompt, fileData=file_data, filename=file_info.get('name', 'document'), mimeType=file_info.get('mimeType', 'application/octet-stream'), - base64Encoded=False + base64Encoded=False, + documentId=chatDocument.id ) all_extracted_content.append(extracted_content) @@ -205,7 +211,7 @@ class MethodDocument(MethodBase): 6. Document structure and organization """ - analysis_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) + analysis_result = await self.service.interfaceAiCalls.callAiTextAdvanced(analysis_prompt) result_data = { "documentCount": len(chatDocuments), @@ -218,8 +224,12 @@ class MethodDocument(MethodBase): return self._createResult( success=True, data={ - "documentName": f"document_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"document_analysis_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) except Exception as e: @@ -261,7 +271,7 @@ class MethodDocument(MethodBase): error="AI prompt is required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -274,19 +284,20 @@ class MethodDocument(MethodBase): for chatDocument in chatDocuments: fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) + file_data = self.service.getFileData(fileId) + file_info = self.service.getFileInfo(fileId) if not file_data: logger.warning(f"File not found or empty for fileId: {fileId}") continue - extracted_content = await self.serviceContainer.extractContentFromFileData( + extracted_content = await self.service.extractContentFromFileData( prompt=aiPrompt, fileData=file_data, filename=file_info.get('name', 'document'), mimeType=file_info.get('mimeType', 'application/octet-stream'), - base64Encoded=False + base64Encoded=False, + documentId=chatDocument.id ) all_extracted_content.append(extracted_content) @@ -316,7 +327,7 @@ class MethodDocument(MethodBase): - Highlight important insights and conclusions """ - summary = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(summary_prompt) + summary = await self.service.interfaceAiCalls.callAiTextAdvanced(summary_prompt) result_data = { "documentCount": len(chatDocuments), @@ -331,8 +342,12 @@ class MethodDocument(MethodBase): return self._createResult( success=True, data={ - "documentName": f"document_summary_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", - "documentData": result_data + "documents": [ + { + "documentName": f"document_summary_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.txt", + "documentData": result_data + } + ] } ) except Exception as e: diff --git a/modules/methods/methodOperator.py b/modules/methods/methodOperator.py deleted file mode 100644 index 5ebc2cb8..00000000 --- a/modules/methods/methodOperator.py +++ /dev/null @@ -1,339 +0,0 @@ -"""Operator method implementation for handling collections and AI operations""" - -from typing import Dict, List, Any, Optional -from datetime import datetime, UTC -import logging -import uuid - -from modules.workflow.methodBase import MethodBase, ActionResult, action - -logger = logging.getLogger(__name__) - -class MethodOperator(MethodBase): - """Operator method implementation for data operations""" - - def __init__(self, serviceContainer: Any): - super().__init__(serviceContainer) - self.name = "operator" - self.description = "Handle data operations like filtering, sorting, and transformation" - - @action - async def filter(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Filter data based on criteria - - Parameters: - documentList (str): Reference to the document list to filter - criteria (Dict[str, Any]): Filter criteria - field (str, optional): Field to filter on - """ - try: - documentList = parameters.get("documentList") - criteria = parameters.get("criteria") - field = parameters.get("field") - - if not documentList or not criteria: - return self._createResult( - success=False, - data={}, - error="Document list reference and criteria are required" - ) - - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) - if not chatDocuments: - return self._createResult( - success=False, - data={}, - error="No documents found for the provided reference" - ) - - # Extract content from all documents - all_document_content = [] - - for chatDocument in chatDocuments: - fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - logger.warning(f"File data not found for fileId: {fileId}") - continue - - all_document_content.append({ - "fileId": fileId, - "fileName": file_info.get('name', 'unknown'), - "content": file_data - }) - - if not all_document_content: - return self._createResult( - success=False, - data={}, - error="No content could be extracted from any documents" - ) - - # Combine all document content for filtering - combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ - f"File: {doc['fileName']}\nContent: {doc['content']}" - for doc in all_document_content - ]) - - filter_prompt = f""" - Filter the following data based on the specified criteria. - - Data to filter: - {combined_content} - - Filter criteria: - {criteria} - - Field to filter on: {field or 'All fields'} - - Please provide: - 1. Filtered data that matches the criteria - 2. Summary of filtering results - 3. Number of items before and after filtering - 4. Any data quality insights - """ - - filtered_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(filter_prompt) - - result_data = { - "documentCount": len(chatDocuments), - "criteria": criteria, - "field": field, - "filteredData": filtered_result, - "originalCount": len(all_document_content), - "timestamp": datetime.now(UTC).isoformat() - } - - return self._createResult( - success=True, - data={ - "documentName": f"filtered_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data - } - ) - except Exception as e: - logger.error(f"Error filtering data: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def sort(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Sort data by specified field - - Parameters: - documentList (str): Reference to the document list to sort - field (str): Field to sort by - order (str, optional): Sort order (asc/desc, default: "asc") - """ - try: - documentList = parameters.get("documentList") - field = parameters.get("field") - order = parameters.get("order", "asc") - - if not documentList or not field: - return self._createResult( - success=False, - data={}, - error="Document list reference and field are required" - ) - - # Get documents from reference - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) - if not chatDocuments: - return self._createResult( - success=False, - data={}, - error="No documents found for the provided reference" - ) - - # Extract content from all documents - all_document_content = [] - - for chatDocument in chatDocuments: - fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - logger.warning(f"File data not found for fileId: {fileId}") - continue - - all_document_content.append({ - "fileId": fileId, - "fileName": file_info.get('name', 'unknown'), - "content": file_data - }) - - if not all_document_content: - return self._createResult( - success=False, - data={}, - error="No content could be extracted from any documents" - ) - - # Combine all document content for sorting - combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ - f"File: {doc['fileName']}\nContent: {doc['content']}" - for doc in all_document_content - ]) - - # Create sorting prompt - sort_prompt = f""" - Sort the following data by the specified field. - - Data to sort: - {combined_content} - - Sort field: {field} - Sort order: {order} - - Please provide: - 1. Sorted data in the specified order - 2. Summary of sorting results - 3. Any data insights from the sorting - 4. Validation of sort field existence - """ - - # Use AI to perform sorting - sorted_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(sort_prompt) - - # Create result data - result_data = { - "documentCount": len(chatDocuments), - "field": field, - "order": order, - "sortedData": sorted_result, - "timestamp": datetime.now(UTC).isoformat() - } - - return self._createResult( - success=True, - data={ - "documentName": f"sorted_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data - } - ) - - except Exception as e: - logger.error(f"Error sorting data: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) - - @action - async def transform(self, parameters: Dict[str, Any]) -> ActionResult: - """ - Transform data structure or format - - Parameters: - documentList (str): Reference to the document list to transform - transformation (Dict[str, Any]): Transformation rules - outputFormat (str, optional): Desired output format - """ - try: - documentList = parameters.get("documentList") - transformation = parameters.get("transformation") - outputFormat = parameters.get("outputFormat", "json") - - if not documentList or not transformation: - return self._createResult( - success=False, - data={}, - error="Document list reference and transformation rules are required" - ) - - # Get documents from reference - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) - if not chatDocuments: - return self._createResult( - success=False, - data={}, - error="No documents found for the provided reference" - ) - - # Extract content from all documents - all_document_content = [] - - for chatDocument in chatDocuments: - fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) - file_info = self.serviceContainer.getFileInfo(fileId) - - if not file_data: - logger.warning(f"File data not found for fileId: {fileId}") - continue - - all_document_content.append({ - "fileId": fileId, - "fileName": file_info.get('name', 'unknown'), - "content": file_data - }) - - if not all_document_content: - return self._createResult( - success=False, - data={}, - error="No content could be extracted from any documents" - ) - - # Combine all document content for transformation - combined_content = "\n\n--- DOCUMENT SEPARATOR ---\n\n".join([ - f"File: {doc['fileName']}\nContent: {doc['content']}" - for doc in all_document_content - ]) - - # Create transformation prompt - transform_prompt = f""" - Transform the following data according to the specified rules. - - Data to transform: - {combined_content} - - Transformation rules: - {transformation} - - Output format: {outputFormat} - - Please provide: - 1. Transformed data in the specified format - 2. Summary of transformation results - 3. Validation of transformation rules - 4. Any data quality improvements - """ - - # Use AI to perform transformation - transformed_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(transform_prompt) - - # Create result data - result_data = { - "documentCount": len(chatDocuments), - "transformation": transformation, - "outputFormat": outputFormat, - "transformedData": transformed_result, - "timestamp": datetime.now(UTC).isoformat() - } - - return self._createResult( - success=True, - data={ - "documentName": f"transformed_data_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{outputFormat}", - "documentData": result_data - } - ) - - except Exception as e: - logger.error(f"Error transforming data: {str(e)}") - return self._createResult( - success=False, - data={}, - error=str(e) - ) diff --git a/modules/methods/methodOutlook.py b/modules/methods/methodOutlook.py index 2563ab06..f681931e 100644 --- a/modules/methods/methodOutlook.py +++ b/modules/methods/methodOutlook.py @@ -25,12 +25,12 @@ class MethodOutlook(MethodBase): def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: """Get Microsoft connection from connection reference""" try: - userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference) if not userConnection or userConnection.authority != "msft" or userConnection.status != "active": return None # Get the corresponding token for this user and authority - token = self.serviceContainer.interfaceApp.getToken(userConnection.authority) + token = self.service.interfaceApp.getToken(userConnection.authority) if not token: logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}") return None @@ -95,7 +95,7 @@ class MethodOutlook(MethodBase): """ # Use AI to simulate email reading - email_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(email_prompt) + email_data = await self.service.interfaceAiCalls.callAiTextAdvanced(email_prompt) # Create result data result_data = { @@ -115,8 +115,12 @@ class MethodOutlook(MethodBase): return self._createResult( success=True, data={ - "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"outlook_emails_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -184,7 +188,7 @@ class MethodOutlook(MethodBase): """ # Use AI to simulate email sending - send_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(send_prompt) + send_result = await self.service.interfaceAiCalls.callAiTextAdvanced(send_prompt) # Create result data result_data = { @@ -269,7 +273,7 @@ class MethodOutlook(MethodBase): """ # Use AI to simulate email search - search_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(search_prompt) + search_result = await self.service.interfaceAiCalls.callAiTextAdvanced(search_prompt) # Create result data result_data = { diff --git a/modules/methods/methodSharepoint.py b/modules/methods/methodSharepoint.py index bd2b7d54..cb36b57b 100644 --- a/modules/methods/methodSharepoint.py +++ b/modules/methods/methodSharepoint.py @@ -24,12 +24,12 @@ class MethodSharepoint(MethodBase): def _getMicrosoftConnection(self, connectionReference: str) -> Optional[Dict[str, Any]]: """Get Microsoft connection from connection reference""" try: - userConnection = self.serviceContainer.getUserConnectionFromConnectionReference(connectionReference) + userConnection = self.service.getUserConnectionFromConnectionReference(connectionReference) if not userConnection or userConnection.authority != "msft" or userConnection.status != "active": return None # Get the corresponding token for this user and authority - token = self.serviceContainer.interfaceApp.getToken(userConnection.authority) + token = self.service.interfaceApp.getToken(userConnection.authority) if not token: logger.warning(f"No token found for user {userConnection.userId} and authority {userConnection.authority}") return None @@ -92,7 +92,7 @@ class MethodSharepoint(MethodBase): 5. Search statistics and coverage """ - find_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(find_prompt) + find_result = await self.service.interfaceAiCalls.callAiTextAdvanced(find_prompt) result_data = { "connectionReference": connectionReference, @@ -111,8 +111,12 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, data={ - "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"sharepoint_find_path_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -150,7 +154,8 @@ class MethodSharepoint(MethodBase): error="Document list reference, connection reference, site URL, and document paths are required" ) - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + # Get documents from reference + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -191,7 +196,7 @@ class MethodSharepoint(MethodBase): 5. Version history if available """ - document_data = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(sharepoint_prompt) + document_data = await self.service.interfaceAiCalls.callAiTextAdvanced(sharepoint_prompt) read_results.append({ "documentPath": documentPath, @@ -216,8 +221,12 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, data={ - "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"sharepoint_documents_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) except Exception as e: @@ -264,7 +273,7 @@ class MethodSharepoint(MethodBase): ) # Get documents from reference - chatDocuments = self.serviceContainer.getChatDocumentsFromDocumentReference(documentList) + chatDocuments = self.service.getChatDocumentsFromDocumentList(documentList) if not chatDocuments: return self._createResult( success=False, @@ -279,7 +288,7 @@ class MethodSharepoint(MethodBase): if i < len(chatDocuments): chatDocument = chatDocuments[i] fileId = chatDocument.fileId - file_data = self.serviceContainer.getFileData(fileId) + file_data = self.service.getFileData(fileId) if not file_data: logger.warning(f"File data not found for fileId: {fileId}") @@ -305,7 +314,7 @@ class MethodSharepoint(MethodBase): """ # Use AI to simulate SharePoint upload - upload_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(upload_prompt) + upload_result = await self.service.interfaceAiCalls.callAiTextAdvanced(upload_prompt) upload_results.append({ "documentPath": documentPath, @@ -333,8 +342,12 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, data={ - "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"sharepoint_upload_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -401,7 +414,7 @@ class MethodSharepoint(MethodBase): """ # Use AI to simulate SharePoint listing - list_result = await self.serviceContainer.interfaceAiCalls.callAiTextAdvanced(list_prompt) + list_result = await self.service.interfaceAiCalls.callAiTextAdvanced(list_prompt) list_results.append({ "folderPath": folderPath, @@ -426,8 +439,12 @@ class MethodSharepoint(MethodBase): return self._createResult( success=True, data={ - "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"sharepoint_document_list_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) diff --git a/modules/methods/methodWeb.py b/modules/methods/methodWeb.py index 48b4b9ae..e993ab55 100644 --- a/modules/methods/methodWeb.py +++ b/modules/methods/methodWeb.py @@ -310,8 +310,12 @@ class MethodWeb(MethodBase): return self._createResult( success=True, data={ - "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"web_crawl_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -399,8 +403,12 @@ class MethodWeb(MethodBase): return self._createResult( success=True, data={ - "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}", - "documentData": result_data + "documents": [ + { + "documentName": f"web_scrape_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.{format}", + "documentData": result_data + } + ] } ) @@ -446,8 +454,8 @@ class MethodWeb(MethodBase): else: # Get user language from service container if available userLanguage = "en" # Default language - if hasattr(self.serviceContainer, 'user') and hasattr(self.serviceContainer.user, 'language'): - userLanguage = self.serviceContainer.user.language + if hasattr(self.service, 'user') and hasattr(self.service.user, 'language'): + userLanguage = self.service.user.language # Format the search request for SerpAPI params = { @@ -528,8 +536,12 @@ class MethodWeb(MethodBase): return self._createResult( success=True, data={ - "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"web_search_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) @@ -600,8 +612,12 @@ class MethodWeb(MethodBase): return self._createResult( success=True, data={ - "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", - "documentData": result_data + "documents": [ + { + "documentName": f"web_validation_{datetime.now(UTC).strftime('%Y%m%d_%H%M%S')}.json", + "documentData": result_data + } + ] } ) diff --git a/modules/workflow/managerChat.py b/modules/workflow/managerChat.py index f68c8888..a5e31b92 100644 --- a/modules/workflow/managerChat.py +++ b/modules/workflow/managerChat.py @@ -201,10 +201,23 @@ class ChatManager: if action.status == TaskStatus.PENDING: action.status = TaskStatus.COMPLETED if review_result.get('status') == 'success' else TaskStatus.FAILED + # Create serializable task actions + task_actions_serializable = [] + for action in task_actions: + action_dict = { + 'id': action.id, + 'execMethod': action.execMethod, + 'execAction': action.execAction, + 'execParameters': action.execParameters, + 'execResultLabel': action.execResultLabel, + 'status': action.status.value if hasattr(action.status, 'value') else str(action.status) + } + task_actions_serializable.append(action_dict) + # Create handover data handover_data = { 'task_step': task_step, - 'task_actions': task_actions, + 'task_actions': task_actions_serializable, 'review_result': review_result, 'next_task_ready': review_result.get('status') == 'success', 'available_results': self._getPreviousResultsFromActions(task_actions) @@ -215,9 +228,22 @@ class ChatManager: except Exception as e: logger.error(f"Error preparing task handover: {str(e)}") + # Create serializable task actions for exception case + task_actions_serializable = [] + for action in task_actions: + action_dict = { + 'id': action.id, + 'execMethod': action.execMethod, + 'execAction': action.execAction, + 'execParameters': action.execParameters, + 'execResultLabel': action.execResultLabel, + 'status': action.status.value if hasattr(action.status, 'value') else str(action.status) + } + task_actions_serializable.append(action_dict) + return { 'task_step': task_step, - 'task_actions': task_actions, + 'task_actions': task_actions_serializable, 'review_result': review_result, 'next_task_ready': False, 'available_results': [] @@ -407,8 +433,8 @@ class ChatManager: # Validate result label format result_label = action.get('resultLabel', '') - if not result_label.startswith('mdoc:'): - logger.error(f"Action {i} result label must start with 'mdoc:': {result_label}") + if not result_label.startswith('docList:'): + logger.error(f"Action {i} result label must start with 'docList:': {result_label}") return False # Validate parameters @@ -444,7 +470,7 @@ class ChatManager: "fileId": doc, "analysis": ["entities", "topics", "sentiment"] }, - "resultLabel": f"mdoc:fallback:{task_step.get('id', 'unknown')}:{i}:analysis", + "resultLabel": f"docList:fallback:{task_step.get('id', 'unknown')}:{i}:analysis", "description": f"Fallback document analysis for {doc}" }) @@ -525,7 +551,7 @@ AVAILABLE CONNECTIONS {chr(10).join(f"- {conn}" for conn in connRefs)} AVAILABLE DOCUMENTS -{chr(10).join(f"- {doc['documentReference']} ({doc['actionMethod']}.{doc['actionName']}, {doc['documentCount']} documents, {doc['datetime']})" for doc in docRefs.get('chat', []))} +{chr(10).join(f"- {doc.documentsLabel}: {', '.join(doc.documents)}" for doc in docRefs.get('chat', []))} PREVIOUS RESULTS: {', '.join(previous_results) if previous_results else 'None'} @@ -550,7 +576,7 @@ REQUIRED JSON STRUCTURE: "param1": "value1", "param2": "value2", }}, - "resultLabel": "mdoc:uuid:descriptiveLabel", + "resultLabel": "docList:uuid:descriptiveLabel", "description": "What this action does" }} ] @@ -560,7 +586,7 @@ FIELD REQUIREMENTS: - "method": Must be one of the available methods listed above - "action": Must be a valid action for that method - "parameters": Object with method-specific parameters -- "resultLabel": MUST start with "mdoc:" followed by unique identifier and descriptive label +- "resultLabel": MUST start with "docList:" followed by unique identifier and descriptive label - "description": Clear description of what the action accomplishes MANDATORY PARAMETER AND RETURN VALUE RULES: @@ -572,15 +598,16 @@ MANDATORY PARAMETER AND RETURN VALUE RULES: - Example: "connection:msft:testuser@example.com:1234" 2. DOCUMENT PARAMETERS: - - Parameter name: "documentReference" (NOT "document", "fileId", "documents", etc.) - - Value: Must be a document reference from "Documents" section or previous results - - Format: "mdoc:uuid:descriptiveLabel" + - Parameter name: "documentList" (NOT "documentReference", "document", "fileId", "documents", etc.) + - Value: MUST be a LIST of document references from "Documents" section or previous results + - Format: Use the exact format shown in "Documents" section (e.g., ["docItem:id:filename"] or ["docList:actionId:label"]) - Document references represent a LIST of documents, not single documents - - All document inputs expect documentList references + - All document inputs expect documentList as an ARRAY of strings + - IMPORTANT: Use the exact document reference format as shown in "Documents" section above 3. RETURN VALUES: - ALL actions must return documentList references in resultLabel - - Result labels must start with "mdoc:" + - Result labels must start with "docList:" - Each action creates a unique documentList for handover - Document lists can contain 0, 1, or multiple documents - No actions return single documents - always documentLists @@ -589,28 +616,24 @@ MANDATORY PARAMETER AND RETURN VALUE RULES: - Use only document references from "Documents" section above - Use only connection references from "Connections" section above - Use result labels from previous results in the sequence - - All parameter values must be strings - - Document references show: method.action - document count - timestamp + - All parameter values must be strings (except documentList which must be an array) + - Document references show: label - list of references 5. RESULT USAGE RULES: - - Previous results can be referenced as: "mdoc:uuid:label" + - Previous results can be referenced as: "docList:uuid:label" - Use result labels from previous actions in the sequence - - Example: If previous action created "mdoc:abc123:salesData", - reference it as "mdoc:abc123:salesData" in parameters + - Example: If previous action created "docList:abc123:salesData", + reference it as "docList:abc123:salesData" in parameters - Results are available in the PREVIOUS RESULTS section above - Each action should create a unique resultLabel for handover to next actions - Result labels should be descriptive and indicate the content type -METHOD-SPECIFIC PARAMETER REQUIREMENTS: - -- coder: Uses "code" (string), "language" (string), "requirements" (string) -- document: Uses "documentReference" (documentList), "fileId" (string for single files) -- excel: Uses "connectionReference" (connection), "fileId" (string) -- operator: Uses "items" (array), "prompt" (string), "documents" (array of documentReferences) -- outlook: Uses "connectionReference" (connection) -- powerpoint: Uses "connectionReference" (connection), "fileId" (string) -- sharepoint: Uses "connectionReference" (connection) -- web: Uses "query" (string), "url" (string) +6. DOCUMENT HANDLING RULES: + - ALWAYS pass documents as a LIST in documentList parameter + - Single documents: ["docItem:id:filename"] + - Multiple documents: ["docItem:id1:file1", "docItem:id2:file2"] + - Document lists: ["docList:actionId:label"] + - Mixed references: ["docItem:id:file", "docList:actionId:label"] EXAMPLE VALID ACTIONS: @@ -622,33 +645,45 @@ EXAMPLE VALID ACTIONS: "connectionReference": "connection:msft:testuser@example.com:1234", "query": "sales quarterly report" }}, - "resultLabel": "mdoc:abc123:salesDocuments", + "resultLabel": "docList:abc123:salesDocuments", "description": "Search SharePoint for sales documents" }} -2. Document Analysis using previous results: +2. Document Analysis using single document: {{ "method": "document", "action": "analyze", "parameters": {{ - "documentReference": "mdoc:def456:customerData", - "analysis": ["entities", "topics", "sentiment"] + "documentList": ["docItem:doc_57520394-6b6d-41c2-b641-bab3fc6d7f4b:candidate_1_profile.txt"], + "aiPrompt": "Analyze the candidate profile for key insights" }}, - "resultLabel": "mdoc:ghi789:customerAnalysis", - "description": "Analyze customer data for insights" + "resultLabel": "docList:ghi789:candidateAnalysis", + "description": "Analyze candidate profile for insights" }} -3. Excel Read: +3. Document Analysis using multiple documents: {{ - "method": "excel", - "action": "read", + "method": "document", + "action": "analyze", "parameters": {{ - "connectionReference": "connection:msft:testuser@example.com:1234", - "fileId": "excel_file_123", - "sheetName": "Sheet1" + "documentList": ["docItem:doc_123:profile.txt", "docItem:doc_456:resume.pdf"], + "aiPrompt": "Compare the profile and resume for consistency" }}, - "resultLabel": "mdoc:jkl012:excelData", - "description": "Read data from Excel file" + "resultLabel": "docList:jkl012:comparisonAnalysis", + "description": "Compare multiple documents for consistency" +}} + +4. Document Extraction using document list: +{{ + "method": "document", + "action": "extract", + "parameters": {{ + "documentList": ["docList:abc123:salesData"], + "aiPrompt": "Extract key information from all sales documents", + "format": "json" + }}, + "resultLabel": "docList:mno345:extractedData", + "description": "Extract key information from document list" }} NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" @@ -659,13 +694,37 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" task_step = review_context['task_step'] step_result = review_context['step_result'] + # Create serializable version of step_result + step_result_serializable = { + 'task_step': step_result.get('task_step', {}), + 'action_results': [], + 'successful_actions': step_result.get('successful_actions', 0), + 'total_actions': step_result.get('total_actions', 0), + 'results': step_result.get('results', []), + 'errors': step_result.get('errors', []) + } + + # Convert action_results to serializable format + for action_result in step_result.get('action_results', []): + serializable_action_result = { + 'status': action_result.get('status', ''), + 'result': action_result.get('result', ''), + 'error': action_result.get('error', ''), + 'resultLabel': action_result.get('resultLabel', ''), + 'documents': action_result.get('documents', []), + 'actionId': action_result.get('actionId', ''), + 'actionMethod': action_result.get('actionMethod', ''), + 'actionName': action_result.get('actionName', '') + } + step_result_serializable['action_results'].append(serializable_action_result) + return f"""You are a result review AI that evaluates task step completion and decides on next actions. TASK STEP: {task_step.get('description', 'Unknown')} EXPECTED OUTPUTS: {', '.join(task_step.get('expected_outputs', []))} SUCCESS CRITERIA: {', '.join(task_step.get('success_criteria', []))} -STEP RESULT: {json.dumps(step_result, indent=2)} +STEP RESULT: {json.dumps(step_result_serializable, indent=2)} INSTRUCTIONS: 1. Evaluate if the task step was completed successfully @@ -723,14 +782,17 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" parameters=action.execParameters ) + # Always use the execResultLabel from the action definition + result_label = action.execResultLabel + # Update action based on result if result.success: action.setSuccess() action.result = result.data.get("result", "") - action.execResultLabel = result.data.get("resultLabel", "") + action.execResultLabel = result_label # Create and store message in workflow for successful action - await self._createActionMessage(action, result, workflow) + await self._createActionMessage(action, result, workflow, result_label) else: action.setError(result.error or "Action execution failed") @@ -739,9 +801,11 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" "status": "completed" if result.success else "failed", "result": result.data.get("result", ""), "error": result.error or "", - "resultLabel": result.data.get("resultLabel", ""), + "resultLabel": result_label, "documents": result.data.get("documents", []), - "action": action + "actionId": action.id, + "actionMethod": action.execMethod, + "actionName": action.execAction } except Exception as e: @@ -750,16 +814,19 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" return { "status": "failed", "error": str(e), - "action": action + "actionId": action.id, + "actionMethod": action.execMethod, + "actionName": action.execAction } - async def _createActionMessage(self, action: TaskAction, result: Any, workflow: ChatWorkflow) -> None: + async def _createActionMessage(self, action: TaskAction, result: Any, workflow: ChatWorkflow, result_label: str = None) -> None: """Create and store a message for the action result in the workflow""" try: # Get result data result_data = result.data if hasattr(result, 'data') else {} - result_label = result_data.get("resultLabel", "") documents_data = result_data.get("documents", []) + if result_label is None: + result_label = action.execResultLabel # Create message data message_data = { @@ -772,7 +839,7 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" "actionId": action.id, "actionMethod": action.execMethod, "actionName": action.execAction, - "documentsLabel": result_label, # Use resultLabel as documentsLabel + "documentsLabel": result_label, # Always use execResultLabel "documents": [] } @@ -1128,8 +1195,18 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" logger.warning(f"No actions defined for task {i+1}, skipping") continue - # Log task actions - logger.debug(f"TASK {i+1} ACTIONS CREATED: {json.dumps(task_actions, indent=2, ensure_ascii=False)}") + # Log task actions (convert to serializable format) + task_actions_serializable = [] + for action in task_actions: + action_dict = { + 'execMethod': action.execMethod, + 'execAction': action.execAction, + 'execParameters': action.execParameters, + 'execResultLabel': action.execResultLabel, + 'status': action.status.value if hasattr(action.status, 'value') else str(action.status) + } + task_actions_serializable.append(action_dict) + logger.debug(f"TASK {i+1} ACTIONS CREATED: {json.dumps(task_actions_serializable, indent=2, ensure_ascii=False)}") # Phase 3: Execute Task Actions logger.info(f"--- PHASE 3: EXECUTING ACTIONS FOR TASK {i+1} ---") @@ -1173,11 +1250,35 @@ NOTE: Respond with ONLY the JSON object. Do not include any explanatory text.""" successful_tasks = sum(1 for result in workflow_results if result['review_result'].get('status') == 'success') total_tasks = len(workflow_results) + # Create serializable workflow results + workflow_results_serializable = [] + for result in workflow_results: + serializable_result = { + 'task_step': result['task_step'], + 'action_results': result['action_results'], + 'review_result': result['review_result'], + 'handover_data': result['handover_data'] + } + # Convert task_actions to serializable format + if 'task_actions' in result: + task_actions_serializable = [] + for action in result['task_actions']: + action_dict = { + 'execMethod': action.execMethod, + 'execAction': action.execAction, + 'execParameters': action.execParameters, + 'execResultLabel': action.execResultLabel, + 'status': action.status.value if hasattr(action.status, 'value') else str(action.status) + } + task_actions_serializable.append(action_dict) + serializable_result['task_actions'] = task_actions_serializable + workflow_results_serializable.append(serializable_result) + workflow_summary = { 'status': 'completed' if successful_tasks == total_tasks else 'partial', 'successful_tasks': successful_tasks, 'total_tasks': total_tasks, - 'workflow_results': workflow_results, + 'workflow_results': workflow_results_serializable, 'final_results': previous_results } diff --git a/modules/workflow/managerDocument.py b/modules/workflow/managerDocument.py index 2a0b7b7a..4c182eaf 100644 --- a/modules/workflow/managerDocument.py +++ b/modules/workflow/managerDocument.py @@ -57,7 +57,7 @@ class DocumentManager: logger.error(f"Error extracting from document: {str(e)}") raise - async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False) -> ExtractedContent: + async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> ExtractedContent: """Extract content from file data directly using prompt""" try: return await self._processor.processFileData( @@ -65,7 +65,8 @@ class DocumentManager: filename=filename, mimeType=mimeType, base64Encoded=base64Encoded, - prompt=prompt + prompt=prompt, + documentId=documentId ) except Exception as e: logger.error(f"Error extracting from file data: {str(e)}") diff --git a/modules/workflow/methodBase.py b/modules/workflow/methodBase.py index 9ceba134..fe109512 100644 --- a/modules/workflow/methodBase.py +++ b/modules/workflow/methodBase.py @@ -130,6 +130,18 @@ class MethodBase: descriptions[lastParam] += " " + line return descriptions, types + def _validateDocumentListParameter(self, parameters: Dict[str, Any], paramName: str = "documentList") -> bool: + """Validate that documentList parameter is a list of strings""" + if paramName not in parameters: + return False + + value = parameters[paramName] + if not isinstance(value, list): + return False + + # Check that all items in the list are strings + return all(isinstance(item, str) for item in value) + def _extractMainDescription(self, docstring: str) -> str: """Extract main description from docstring""" if not docstring: @@ -217,7 +229,18 @@ class MethodBase: actionDef = self.actions[action] requiredParams = {k for k, v in actionDef['parameters'].items() if v['required']} - return all(param in parameters for param in requiredParams) + + # Check required parameters + if not all(param in parameters for param in requiredParams): + return False + + # Validate documentList parameter if present + if "documentList" in parameters: + if not self._validateDocumentListParameter(parameters, "documentList"): + self.logger.error("documentList parameter must be a list of strings") + return False + + return True except Exception as e: self.logger.error(f"Error validating parameters: {str(e)}") diff --git a/modules/workflow/processorDocument.py b/modules/workflow/processorDocument.py index bb352a26..1ea5084c 100644 --- a/modules/workflow/processorDocument.py +++ b/modules/workflow/processorDocument.py @@ -109,7 +109,7 @@ class DocumentProcessor: except ImportError as e: logger.warning(f"Image processing libraries could not be loaded: {e}") - async def processFileData(self, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, prompt: str = None) -> ExtractedContent: + async def processFileData(self, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, prompt: str = None, documentId: str = None) -> ExtractedContent: """ Process file data directly and extract its contents with AI processing. @@ -154,8 +154,7 @@ class DocumentProcessor: logger.error(f"Error processing content with AI: {str(e)}") return ExtractedContent( - objectId=str(uuid.uuid4()), - objectType="FileData", + id=documentId if documentId else str(uuid.uuid4()), contents=contentItems ) @@ -550,11 +549,11 @@ class DocumentProcessor: # Chunk content based on type if mimeType.startswith('text/'): - chunks = await self._chunkText(item.data, mimeType) + chunks = self._chunkText(item.data, mimeType) elif mimeType.startswith('image/'): - chunks = await self._chunkImage(item.data) + chunks = self._chunkImage(item.data) elif mimeType.startswith('video/'): - chunks = await self._chunkVideo(item.data) + chunks = self._chunkVideo(item.data) else: # Binary data - no chunking chunks = [item.data] diff --git a/modules/workflow/serviceContainer.py b/modules/workflow/serviceContainer.py index 09bd9538..53261b7e 100644 --- a/modules/workflow/serviceContainer.py +++ b/modules/workflow/serviceContainer.py @@ -6,7 +6,8 @@ from typing import Dict, Any, List, Optional from modules.interfaces.interfaceAppModel import User, UserConnection from modules.interfaces.interfaceChatModel import ( TaskStatus, ChatDocument, TaskItem, TaskAction, TaskResult, - ChatStat, ChatLog, ChatMessage, ChatWorkflow + + ChatStat, ChatLog, ChatMessage, ChatWorkflow, DocumentExchange ) from modules.interfaces.interfaceAiCalls import AiCalls from modules.interfaces.interfaceChatObjects import getInterface as getChatObjects @@ -17,6 +18,7 @@ from modules.workflow.managerDocument import DocumentManager from modules.workflow.methodBase import MethodBase import uuid import base64 +import hashlib logger = logging.getLogger(__name__) @@ -115,9 +117,13 @@ class ServiceContainer: """Extract content from document using prompt""" return self.documentManager.extractContentFromDocument(prompt, document) - def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False) -> str: + async def extractContentFromFileData(self, prompt: str, fileData: bytes, filename: str, mimeType: str, base64Encoded: bool = False, documentId: str = None) -> str: """Extract content from file data directly using prompt""" - return self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded) + extracted_content = await self.documentManager.extractContentFromFileData(prompt, fileData, filename, mimeType, base64Encoded, documentId) + # Convert ExtractedContent to string for backward compatibility + if hasattr(extracted_content, 'contents'): + return "\n".join([item.data for item in extracted_content.contents]) + return str(extracted_content) def getMethodsCatalog(self) -> Dict[str, Any]: """Get catalog of available methods and their actions""" @@ -148,127 +154,120 @@ class ServiceContainer: return methodList - def getDocumentReferenceList(self) -> Dict[str, List[Dict[str, str]]]: - """Get list of document references sorted by datetime, categorized by chat round""" - chat_refs = [] - history_refs = [] + def getDocumentReferenceList(self) -> Dict[str, List[DocumentExchange]]: + """Get list of document exchanges sorted by datetime, categorized by chat round""" + chat_exchanges = [] + history_exchanges = [] # Process messages in reverse order to find current chat round for message in reversed(self.workflow.messages): # Get document references from message if message.documents: - # For messages with action context, use documentList reference + # For messages with action context, create DocumentExchange with docList reference if message.actionId and message.documentsLabel: doc_ref = self.getDocumentReferenceFromMessage(message) if doc_ref: - doc_info = { - "documentReference": doc_ref, - "datetime": message.publishedAt, - "actionMethod": message.actionMethod, - "actionName": message.actionName, - "documentCount": len(message.documents) - } + # Create DocumentExchange with single docList reference + doc_exchange = DocumentExchange( + documentsLabel=message.documentsLabel, + documents=[doc_ref] + ) # Add to appropriate list based on message status if message.status == "first": - chat_refs.append(doc_info) + chat_exchanges.append(doc_exchange) break # Stop after finding first message elif message.status == "step": - chat_refs.append(doc_info) + chat_exchanges.append(doc_exchange) else: - history_refs.append(doc_info) - # For regular messages, use individual document references + history_exchanges.append(doc_exchange) + # For regular messages, create DocumentExchange with individual docItem references else: + doc_refs = [] for doc in message.documents: doc_ref = self.getDocumentReferenceFromChatDocument(doc) - doc_info = { - "documentReference": doc_ref, - "datetime": message.publishedAt, - "actionMethod": None, - "actionName": None, - "documentCount": 1 - } + doc_refs.append(doc_ref) + + if doc_refs: + # Create DocumentExchange with individual document references + doc_exchange = DocumentExchange( + documentsLabel=f"{message.id}:documents", + documents=doc_refs + ) # Add to appropriate list based on message status if message.status == "first": - chat_refs.append(doc_info) + chat_exchanges.append(doc_exchange) break # Stop after finding first message elif message.status == "step": - chat_refs.append(doc_info) + chat_exchanges.append(doc_exchange) else: - history_refs.append(doc_info) + history_exchanges.append(doc_exchange) # Stop processing if we hit a first message if message.status == "first": break # Sort both lists by datetime in descending order - chat_refs.sort(key=lambda x: x["datetime"], reverse=True) - history_refs.sort(key=lambda x: x["datetime"], reverse=True) + chat_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True) + history_exchanges.sort(key=lambda x: x.documentsLabel, reverse=True) return { - "chat": chat_refs, - "history": history_refs + "chat": chat_exchanges, + "history": history_exchanges } def getDocumentReferenceFromChatDocument(self, document: ChatDocument) -> str: """Get document reference from ChatDocument""" - return f"cdoc:{document.id}:{document.filename}" + return f"docItem:{document.id}:{document.filename}" def getDocumentReferenceFromMessage(self, message: ChatMessage) -> str: - """Get document reference from ChatMessage with action context""" - if not message.actionId or not message.documentsLabel: - return None - + """Get document reference from ChatMessage""" # If documentsLabel already contains the full reference format, return it - if message.documentsLabel.startswith("mdoc:"): + if message.documentsLabel.startswith("docList:"): return message.documentsLabel - # Otherwise construct the reference using the action ID and documents label - return f"mdoc:{message.actionId}:{message.documentsLabel}" + # Otherwise construct the reference using the message ID and documents label + return f"docList:{message.id}:{message.documentsLabel}" - def getChatDocumentsFromDocumentReference(self, documentReference: str) -> List[ChatDocument]: - """Get ChatDocuments from document reference""" + def getChatDocumentsFromDocumentList(self, documentList: List[str]) -> List[ChatDocument]: + """Get ChatDocuments from a list of document references""" try: - # Parse reference format - parts = documentReference.split(':', 2) # Split into max 3 parts - if len(parts) < 3: - return [] + all_documents = [] + for doc_ref in documentList: + # Parse reference format + parts = doc_ref.split(':', 2) # Split into max 3 parts + if len(parts) < 3: + continue + + ref_type = parts[0] + ref_id = parts[1] + ref_label = parts[2] # Keep the full label - ref_type = parts[0] - ref_id = parts[1] - ref_label = parts[2] # Keep the full label - - if ref_type == "cdoc": - # Handle ChatDocument reference: cdoc:: - # Find document in workflow messages - for message in self.workflow.messages: - if message.documents: - for doc in message.documents: - if doc.id == ref_id: - return [doc] + if ref_type == "docItem": + # Handle ChatDocument reference: docItem:: + # Find document in workflow messages + for message in self.workflow.messages: + if message.documents: + for doc in message.documents: + if doc.id == ref_id: + all_documents.append(doc) + break + if any(doc.id == ref_id for doc in message.documents): + break - elif ref_type == "mdoc": - # Handle document list reference: mdoc::