# Copyright (c) 2025 Patrick Motsch # All rights reserved. """Chat service for document processing, chat operations, and workflow management.""" import logging from typing import Dict, Any, List, Optional, Callable from modules.datamodels.datamodelUam import User, UserConnection from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatLog from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum from modules.shared.progressLogger import ProgressLogger logger = logging.getLogger(__name__) class ChatService: """Service class containing methods for document processing, chat operations, and workflow management.""" def __init__(self, context, get_service: Callable[[str], Any]): """Initialize with ServiceCenterContext and service resolver.""" self._context = context self._get_service = get_service self.user = context.user from modules.interfaces.interfaceDbApp import getInterface as getAppInterface from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface from modules.interfaces.interfaceDbChat import getInterface as getChatInterface self.interfaceDbApp = getAppInterface(context.user, mandateId=context.mandate_id) self.interfaceDbComponent = getComponentInterface(context.user, mandateId=context.mandate_id) self.interfaceDbChat = getChatInterface( context.user, mandateId=context.mandate_id, featureInstanceId=context.feature_instance_id, ) self._progressLogger = None @property def _workflow(self): """Workflow from context (stable during workflow execution).""" return self._context.workflow def getChatDocumentsFromDocumentList(self, documentList) -> List[ChatDocument]: """Get ChatDocuments from a DocumentReferenceList. Args: documentList: DocumentReferenceList (required) Returns: List[ChatDocument]: List of ChatDocument objects """ from modules.datamodels.datamodelDocref import DocumentReferenceList if not isinstance(documentList, DocumentReferenceList): logger.error(f"getChatDocumentsFromDocumentList: Invalid documentList type: {type(documentList)}. Expected DocumentReferenceList.") return [] # Convert to string list for processing stringRefs = documentList.to_string_list() try: # Use self._workflow which is the ChatWorkflow object (stable during workflow execution) workflow = self._workflow if not workflow: logger.error("getChatDocumentsFromDocumentList: No workflow available (self._workflow is not set)") return [] workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID' workflowObjId = id(workflow) logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {stringRefs}") logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflowId}, workflow object id = {workflowObjId}") # Root cause analysis: Verify workflow.messages integrity and detect workflow changes self._verifyWorkflowMessagesIntegrity(workflow, workflowId) # Debug: list available messages with their labels and document names (filtered by workflowId) try: if workflow and hasattr(workflow, 'messages') and workflow.messages: msgLines = [] messagesFromOtherWorkflows = [] for message in workflow.messages: msgWorkflowId = getattr(message, 'workflowId', None) # Only include messages that belong to this workflow if msgWorkflowId and msgWorkflowId != workflowId: messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId={msgWorkflowId}") continue # Also skip messages without workflowId (shouldn't happen, but be safe) if not msgWorkflowId: messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId=Missing") continue label = getattr(message, 'documentsLabel', None) docNames = [] if getattr(message, 'documents', None): for doc in message.documents: name = getattr(doc, 'fileName', None) or getattr(doc, 'documentName', None) or 'Unnamed' docNames.append(name) msgLines.append( f"- id={getattr(message, 'id', None)}, label={label}, workflowId={msgWorkflowId}, docs={docNames}" ) if msgLines: logger.debug("getChatDocumentsFromDocumentList: available messages (filtered for workflow):\n" + "\n".join(msgLines)) if messagesFromOtherWorkflows: logger.warning(f"getChatDocumentsFromDocumentList: Found {len(messagesFromOtherWorkflows)} messages from other workflows in workflow.messages list:\n" + "\n".join(messagesFromOtherWorkflows)) else: logger.debug("getChatDocumentsFromDocumentList: no messages available on current workflow") except Exception as e: logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}") allDocuments = [] for docRef in stringRefs: if docRef.startswith("docItem:"): # docItem:: or docItem: (filename is optional) # ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct) # Both formats are supported: docItem: and docItem:: parts = docRef.split(':') if len(parts) >= 2: docId = parts[1] # This should be the documentId (UUID) docFound = False # ALWAYS try to match by documentId first (regardless of number of parts) # This handles both formats: # - docItem: (without filename - still works) # - docItem:: (with filename - preferred) for message in workflow.messages: # Validate message belongs to this workflow msgWorkflowId = getattr(message, 'workflowId', None) if not msgWorkflowId or msgWorkflowId != workflowId: continue if message.documents: for doc in message.documents: if doc.id == docId: allDocuments.append(doc) docFound = True logger.debug(f"Matched document reference '{docRef}' to document {doc.id} (fileName: {getattr(doc, 'fileName', 'unknown')}) by documentId") break if docFound: break # Fallback: If not found by documentId and it looks like a filename (has file extension), try filename matching # This handles cases where AI incorrectly generates docItem:filename.docx if not docFound and '.' in docId and len(parts) == 2: # Format: docItem:filename (AI generated wrong format) - try to match by filename filename = parts[1] logger.warning(f"Document reference '{docRef}' not found by documentId, attempting to match by filename: {filename}") for message in workflow.messages: # Validate message belongs to this workflow msgWorkflowId = getattr(message, 'workflowId', None) if not msgWorkflowId or msgWorkflowId != workflowId: continue if message.documents: for doc in message.documents: docFileName = getattr(doc, 'fileName', '') # Match filename exactly or by base name (without path) if docFileName == filename or docFileName.endswith(filename): allDocuments.append(doc) docFound = True logger.info(f"Matched document reference '{docRef}' to document {doc.id} by filename {docFileName}") break if docFound: break if not docFound: logger.error(f"Could not resolve document reference '{docRef}' - no document found with filename '{filename}'") elif not docFound: logger.error(f"Could not resolve document reference '{docRef}' - no document found with documentId '{docId}'") elif docRef.startswith("docList:"): # docList::