gateway/modules/serviceCenter/services/serviceChat/mainServiceChat.py
2026-03-16 13:13:46 +01:00

1193 lines
59 KiB
Python

# Copyright (c) 2025 Patrick Motsch
# All rights reserved.
"""Chat service for document processing, chat operations, and workflow management."""
import logging
from typing import Dict, Any, List, Optional, Callable
from modules.datamodels.datamodelUam import User, UserConnection
from modules.datamodels.datamodelChat import ChatDocument, ChatMessage, ChatLog
from modules.datamodels.datamodelAi import AiCallOptions, OperationTypeEnum, PriorityEnum, ProcessingModeEnum
from modules.shared.progressLogger import ProgressLogger
logger = logging.getLogger(__name__)
class ChatService:
"""Service class containing methods for document processing, chat operations, and workflow management."""
def __init__(self, context, get_service: Callable[[str], Any]):
"""Initialize with ServiceCenterContext and service resolver."""
self._context = context
self._get_service = get_service
self.user = context.user
from modules.interfaces.interfaceDbApp import getInterface as getAppInterface
from modules.interfaces.interfaceDbManagement import getInterface as getComponentInterface
from modules.interfaces.interfaceDbChat import getInterface as getChatInterface
self.interfaceDbApp = getAppInterface(context.user, mandateId=context.mandate_id)
self.interfaceDbComponent = getComponentInterface(context.user, mandateId=context.mandate_id)
self.interfaceDbChat = getChatInterface(
context.user,
mandateId=context.mandate_id,
featureInstanceId=context.feature_instance_id,
)
self._progressLogger = None
@property
def _workflow(self):
"""Workflow from context (stable during workflow execution)."""
return self._context.workflow
def getChatDocumentsFromDocumentList(self, documentList) -> List[ChatDocument]:
"""Get ChatDocuments from a DocumentReferenceList.
Args:
documentList: DocumentReferenceList (required)
Returns:
List[ChatDocument]: List of ChatDocument objects
"""
from modules.datamodels.datamodelDocref import DocumentReferenceList
if not isinstance(documentList, DocumentReferenceList):
logger.error(f"getChatDocumentsFromDocumentList: Invalid documentList type: {type(documentList)}. Expected DocumentReferenceList.")
return []
# Convert to string list for processing
stringRefs = documentList.to_string_list()
try:
# Use self._workflow which is the ChatWorkflow object (stable during workflow execution)
workflow = self._workflow
if not workflow:
logger.error("getChatDocumentsFromDocumentList: No workflow available (self._workflow is not set)")
return []
workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
workflowObjId = id(workflow)
logger.debug(f"getChatDocumentsFromDocumentList: input documentList = {stringRefs}")
logger.debug(f"getChatDocumentsFromDocumentList: using workflow.id = {workflowId}, workflow object id = {workflowObjId}")
# Root cause analysis: Verify workflow.messages integrity and detect workflow changes
self._verifyWorkflowMessagesIntegrity(workflow, workflowId)
# Debug: list available messages with their labels and document names (filtered by workflowId)
try:
if workflow and hasattr(workflow, 'messages') and workflow.messages:
msgLines = []
messagesFromOtherWorkflows = []
for message in workflow.messages:
msgWorkflowId = getattr(message, 'workflowId', None)
# Only include messages that belong to this workflow
if msgWorkflowId and msgWorkflowId != workflowId:
messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId={msgWorkflowId}")
continue
# Also skip messages without workflowId (shouldn't happen, but be safe)
if not msgWorkflowId:
messagesFromOtherWorkflows.append(f"id={getattr(message, 'id', None)}, label={getattr(message, 'documentsLabel', None)}, workflowId=Missing")
continue
label = getattr(message, 'documentsLabel', None)
docNames = []
if getattr(message, 'documents', None):
for doc in message.documents:
name = getattr(doc, 'fileName', None) or getattr(doc, 'documentName', None) or 'Unnamed'
docNames.append(name)
msgLines.append(
f"- id={getattr(message, 'id', None)}, label={label}, workflowId={msgWorkflowId}, docs={docNames}"
)
if msgLines:
logger.debug("getChatDocumentsFromDocumentList: available messages (filtered for workflow):\n" + "\n".join(msgLines))
if messagesFromOtherWorkflows:
logger.warning(f"getChatDocumentsFromDocumentList: Found {len(messagesFromOtherWorkflows)} messages from other workflows in workflow.messages list:\n" + "\n".join(messagesFromOtherWorkflows))
else:
logger.debug("getChatDocumentsFromDocumentList: no messages available on current workflow")
except Exception as e:
logger.debug(f"getChatDocumentsFromDocumentList: unable to enumerate messages for debug: {e}")
allDocuments = []
for docRef in stringRefs:
if docRef.startswith("docItem:"):
# docItem:<id>:<filename> or docItem:<id> (filename is optional)
# ALWAYS try to match by documentId first (parts[1] is always the documentId when format is correct)
# Both formats are supported: docItem:<documentId> and docItem:<documentId>:<filename>
parts = docRef.split(':')
if len(parts) >= 2:
docId = parts[1] # This should be the documentId (UUID)
docFound = False
# ALWAYS try to match by documentId first (regardless of number of parts)
# This handles both formats:
# - docItem:<documentId> (without filename - still works)
# - docItem:<documentId>:<filename> (with filename - preferred)
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
continue
if message.documents:
for doc in message.documents:
if doc.id == docId:
allDocuments.append(doc)
docFound = True
logger.debug(f"Matched document reference '{docRef}' to document {doc.id} (fileName: {getattr(doc, 'fileName', 'unknown')}) by documentId")
break
if docFound:
break
# Fallback: If not found by documentId and it looks like a filename (has file extension), try filename matching
# This handles cases where AI incorrectly generates docItem:filename.docx
if not docFound and '.' in docId and len(parts) == 2:
# Format: docItem:filename (AI generated wrong format) - try to match by filename
filename = parts[1]
logger.warning(f"Document reference '{docRef}' not found by documentId, attempting to match by filename: {filename}")
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
continue
if message.documents:
for doc in message.documents:
docFileName = getattr(doc, 'fileName', '')
# Match filename exactly or by base name (without path)
if docFileName == filename or docFileName.endswith(filename):
allDocuments.append(doc)
docFound = True
logger.info(f"Matched document reference '{docRef}' to document {doc.id} by filename {docFileName}")
break
if docFound:
break
if not docFound:
logger.error(f"Could not resolve document reference '{docRef}' - no document found with filename '{filename}'")
elif not docFound:
logger.error(f"Could not resolve document reference '{docRef}' - no document found with documentId '{docId}'")
elif docRef.startswith("docList:"):
# docList:<messageId>:<label> or docList:<label> - extract message ID and find document list
parts = docRef.split(':')
if len(parts) >= 3:
# Format: docList:<messageId>:<label>
messageId = parts[1]
label = parts[2]
# First try to find the message by ID in the current workflow
messageFound = None
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
continue
if str(message.id) == messageId:
messageFound = message
break
# If message ID not found in current workflow, this is a stale reference
# Log warning and return empty list (don't fall back to label - it might match wrong message)
if not messageFound:
availableIds = [str(msg.id) for msg in workflow.messages]
logger.warning(f"Document reference contains stale message ID {messageId} not found in current workflow {workflow.id}. Label: {label}. Available message IDs: {availableIds}")
logger.warning(f"This indicates the document reference was created in a different workflow state. Returning empty list.")
# Return empty list - don't fall back to label matching which could match wrong message
continue
# If found, add documents
if messageFound and messageFound.documents:
allDocuments.extend(messageFound.documents)
elif len(parts) >= 2:
# Format: docList:<label> - find message by documentsLabel
label = parts[1]
messageFound = None
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
if msgWorkflowId:
logger.warning(f"Message {message.id} has workflowId {msgWorkflowId} but belongs to workflow {workflowId}. Skipping.")
else:
logger.warning(f"Message {message.id} has no workflowId. Skipping.")
continue
msgLabel = getattr(message, 'documentsLabel', None)
if msgLabel == label:
messageFound = message
break
# If found, add documents
if messageFound and messageFound.documents:
allDocuments.extend(messageFound.documents)
else:
# Direct label reference - can be round1_task2_action3_contextinfo format or simple label
# Search for messages with matching documentsLabel to find the actual documents
matchingMessages = []
for message in workflow.messages:
# Validate message belongs to this workflow
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId or msgWorkflowId != workflowId:
if msgWorkflowId:
logger.debug(f"Skipping message {message.id} with workflowId {msgWorkflowId} (expected {workflowId})")
else:
logger.debug(f"Skipping message {message.id} with no workflowId (expected {workflowId})")
continue
msgDocumentsLabel = getattr(message, 'documentsLabel', '')
# Check if this message's documentsLabel matches our reference
if msgDocumentsLabel == docRef:
# Found a matching message, collect it for comparison
matchingMessages.append(message)
# If we found matching messages, take the newest one (highest publishedAt)
if matchingMessages:
# Sort by publishedAt descending (newest first)
matchingMessages.sort(key=lambda msg: getattr(msg, 'publishedAt', 0), reverse=True)
newestMessage = matchingMessages[0]
if newestMessage.documents:
docNames = [doc.fileName for doc in newestMessage.documents if hasattr(doc, 'fileName')]
logger.debug(f"Added {len(newestMessage.documents)} documents from newest message {newestMessage.id}: {docNames}")
allDocuments.extend(newestMessage.documents)
else:
logger.debug(f"No documents found in newest message {newestMessage.id}")
else:
logger.error(f"No messages found with documentsLabel: {docRef}")
raise ValueError(f"Document reference not found: {docRef}")
logger.debug(f"Resolved {len(allDocuments)} documents from document list: {documentList}")
return allDocuments
except Exception as e:
logger.error(f"Error getting documents from document list: {str(e)}")
return []
def _verifyWorkflowMessagesIntegrity(self, workflow, expectedWorkflowId: str) -> None:
"""
Verify that all messages in workflow.messages belong to the expected workflow.
This helps detect when workflow objects are being mixed up or when messages from
other workflows are incorrectly included.
"""
try:
if not workflow or not hasattr(workflow, 'messages') or not workflow.messages:
return
messagesFromOtherWorkflows = []
messagesWithoutWorkflowId = []
totalMessages = len(workflow.messages)
for message in workflow.messages:
msgWorkflowId = getattr(message, 'workflowId', None)
if not msgWorkflowId:
messagesWithoutWorkflowId.append({
'id': getattr(message, 'id', 'unknown'),
'label': getattr(message, 'documentsLabel', None)
})
elif msgWorkflowId != expectedWorkflowId:
messagesFromOtherWorkflows.append({
'id': getattr(message, 'id', 'unknown'),
'label': getattr(message, 'documentsLabel', None),
'workflowId': msgWorkflowId,
'expectedWorkflowId': expectedWorkflowId
})
if messagesFromOtherWorkflows:
logger.error(
f"CRITICAL: Workflow integrity violation detected! "
f"Workflow {expectedWorkflowId} contains {len(messagesFromOtherWorkflows)} messages from other workflows. "
f"Total messages: {totalMessages}. "
f"Foreign messages: {messagesFromOtherWorkflows}"
)
if messagesWithoutWorkflowId:
logger.warning(
f"Workflow integrity issue: Workflow {expectedWorkflowId} contains {len(messagesWithoutWorkflowId)} messages without workflowId. "
f"Messages: {messagesWithoutWorkflowId}"
)
# Also check if self._workflow has changed (workflow object ID mismatch)
currentWorkflow = self._workflow
if currentWorkflow and hasattr(currentWorkflow, 'id'):
currentWorkflowId = currentWorkflow.id
if currentWorkflowId != expectedWorkflowId:
logger.error(
f"CRITICAL: Workflow object changed during execution! "
f"Expected workflow {expectedWorkflowId}, but self._workflow now points to {currentWorkflowId}. "
f"This indicates the workflow object was swapped mid-execution."
)
except Exception as e:
logger.debug(f"Error during workflow integrity verification: {e}")
def getConnectionReferenceFromUserConnection(self, connection: UserConnection) -> str:
"""Get connection reference from UserConnection with enhanced state information"""
# Get token information to check if it's expired
token = None
token_status = "unknown"
try:
# Get a fresh token via security service
logger.debug(f"Getting fresh token for connection {connection.id}")
token = self._get_service("security").getFreshToken(connection.id)
if token:
if hasattr(token, 'expiresAt') and token.expiresAt:
current_time = self._get_service("utils").timestampGetUtc()
if current_time > token.expiresAt:
token_status = "expired"
else:
# Check if this token was recently refreshed (within last 5 minutes)
time_since_creation = current_time - token.createdAt if hasattr(token, 'createdAt') else 0
if time_since_creation < 300: # 5 minutes
token_status = "valid (refreshed)"
else:
token_status = "valid"
else:
token_status = "no_expiration"
else:
token_status = "no_token"
except Exception as e:
token_status = f"error: {str(e)}"
# Build enhanced reference with state information
# Format: connection:msft:<username> (without UUID)
base_ref = f"connection:{connection.authority.value}:{connection.externalUsername}"
state_info = f" [status:{connection.status.value}, token:{token_status}]"
logger.debug(f"getConnectionReferenceFromUserConnection: Built reference: {base_ref + state_info}")
return base_ref + state_info
def getUserConnectionByExternalUsername(self, authority: str, externalUsername: str) -> Optional[UserConnection]:
"""Fetch the user's connection by authority and external username."""
try:
if not authority or not externalUsername:
return None
user_connections = self.interfaceDbApp.getUserConnections(self.user.id)
for connection in user_connections:
# Normalize authority for comparison (enum vs string)
connection_authority = connection.authority.value if hasattr(connection.authority, 'value') else str(connection.authority)
if connection_authority.lower() == authority.lower() and connection.externalUsername == externalUsername:
return connection
return None
except Exception as e:
logger.error(f"Error getting connection by external username: {str(e)}")
return None
def getUserConnectionFromConnectionReference(self, connectionReference: str) -> Optional[UserConnection]:
"""Get UserConnection from reference string (handles new format without UUID)"""
try:
# Parse reference format: connection:{authority}:{username} [status:..., token:...]
# Remove state information if present
base_reference = connectionReference.split(' [')[0]
parts = base_reference.split(':')
if len(parts) != 3 or parts[0] != "connection":
return None
authority = parts[1]
username = parts[2]
# Get user connections through AppObjects interface
user_connections = self.interfaceDbApp.getUserConnections(self.user.id)
# Find matching connection by authority and username (no UUID needed)
for conn in user_connections:
if conn.authority.value == authority and conn.externalUsername == username:
return conn
return None
except Exception as e:
logger.error(f"Error parsing connection reference: {str(e)}")
return None
def getFreshConnectionToken(self, connectionId: str):
"""Get a fresh token for a specific connection (moved from UtilsService).
Args:
connectionId: ID of the connection to get token for
Returns:
Token object or None if not found/expired
"""
try:
return self._get_service("security").getFreshToken(connectionId)
except Exception as e:
logger.error(f"Error getting fresh token for connection {connectionId}: {str(e)}")
return None
def getFileInfo(self, fileId: str) -> Dict[str, Any]:
"""Get file information including new fields (tags, folderId, description, status)."""
fileItem = self.interfaceDbComponent.getFile(fileId)
if fileItem:
return {
"id": fileItem.id,
"fileName": fileItem.fileName,
"size": fileItem.fileSize,
"mimeType": fileItem.mimeType,
"fileHash": fileItem.fileHash,
"creationDate": fileItem.creationDate,
"tags": getattr(fileItem, "tags", None),
"folderId": getattr(fileItem, "folderId", None),
"description": getattr(fileItem, "description", None),
"status": getattr(fileItem, "status", None),
}
return None
def getFileData(self, fileId: str) -> bytes:
"""Get file data by ID."""
return self.interfaceDbComponent.getFileData(fileId)
def getFileContent(self, fileId: str) -> Optional[Dict[str, Any]]:
"""Get file content as text or base64 via FilePreview."""
preview = self.interfaceDbComponent.getFileContent(fileId)
if preview:
return preview.toDictWithBase64Encoding()
return None
def listFiles(
self,
folderId: str = None,
tags: List[str] = None,
search: str = None,
) -> List[Dict[str, Any]]:
"""List files for the current user with optional filters.
Args:
folderId: Filter by folder (None = root / all).
tags: Filter by tags (any match).
search: Search in fileName and description.
Returns:
List of file info dicts.
"""
allFiles = self.interfaceDbComponent.getAllFiles()
results = []
for fileItem in allFiles:
if folderId is not None:
itemFolderId = getattr(fileItem, "folderId", None)
if itemFolderId != folderId:
continue
if tags:
itemTags = getattr(fileItem, "tags", None) or []
if not any(t in itemTags for t in tags):
continue
if search:
searchLower = search.lower()
nameMatch = searchLower in (fileItem.fileName or "").lower()
descMatch = searchLower in (getattr(fileItem, "description", None) or "").lower()
if not nameMatch and not descMatch:
continue
results.append({
"id": fileItem.id,
"fileName": fileItem.fileName,
"mimeType": fileItem.mimeType,
"fileSize": fileItem.fileSize,
"creationDate": fileItem.creationDate,
"tags": getattr(fileItem, "tags", None),
"folderId": getattr(fileItem, "folderId", None),
"description": getattr(fileItem, "description", None),
"status": getattr(fileItem, "status", None),
})
return results
def listFolders(self, parentId: str = None) -> List[Dict[str, Any]]:
"""List file folders for the current user.
Args:
parentId: Parent folder ID (None = root folders).
Returns:
List of folder dicts.
"""
from modules.datamodels.datamodelFileFolder import FileFolder
recordFilter = {"_createdBy": self.user.id if self.user else ""}
if parentId is not None:
recordFilter["parentId"] = parentId
else:
recordFilter["parentId"] = None
return self.interfaceDbComponent.db.getRecordset(FileFolder, recordFilter=recordFilter)
def createFolder(self, name: str, parentId: str = None) -> Dict[str, Any]:
"""Create a new file folder."""
from modules.datamodels.datamodelFileFolder import FileFolder
folder = FileFolder(name=name, parentId=parentId)
return self.interfaceDbComponent.db.recordCreate(FileFolder, folder)
# ---- DataSource CRUD ----
def createDataSource(
self, connectionId: str, sourceType: str, path: str, label: str,
featureInstanceId: str = None
) -> Dict[str, Any]:
"""Create a new external data source reference."""
from modules.datamodels.datamodelDataSource import DataSource
ds = DataSource(
connectionId=connectionId,
sourceType=sourceType,
path=path,
label=label,
featureInstanceId=featureInstanceId or self._context.feature_instance_id or "",
mandateId=self._context.mandate_id or "",
userId=self.user.id if self.user else "",
)
return self.interfaceDbComponent.db.recordCreate(DataSource, ds)
def listDataSources(self, featureInstanceId: str = None) -> List[Dict[str, Any]]:
"""List data sources, optionally filtered by feature instance."""
from modules.datamodels.datamodelDataSource import DataSource
recordFilter = {}
if featureInstanceId:
recordFilter["featureInstanceId"] = featureInstanceId
return self.interfaceDbComponent.db.getRecordset(DataSource, recordFilter=recordFilter)
def getDataSource(self, dataSourceId: str) -> Optional[Dict[str, Any]]:
"""Get a single data source by ID."""
from modules.datamodels.datamodelDataSource import DataSource
results = self.interfaceDbComponent.db.getRecordset(DataSource, recordFilter={"id": dataSourceId})
return results[0] if results else None
def deleteDataSource(self, dataSourceId: str) -> bool:
"""Delete a data source."""
from modules.datamodels.datamodelDataSource import DataSource
try:
self.interfaceDbComponent.db.recordDelete(DataSource, dataSourceId)
return True
except Exception as e:
logger.error(f"Failed to delete DataSource {dataSourceId}: {e}")
return False
def getUserConnections(self) -> List[Dict[str, Any]]:
"""Get all UserConnections for the current user."""
try:
if self.interfaceDbApp and self.user:
connections = self.interfaceDbApp.getUserConnections(self.user.id)
return [c.model_dump() if hasattr(c, "model_dump") else c for c in (connections or [])]
except Exception as e:
logger.error(f"Error getting user connections: {e}")
return []
def _diagnoseDocumentAccess(self, document: ChatDocument) -> Dict[str, Any]:
"""
Diagnose document access issues and provide recovery information.
This method helps identify why document properties are inaccessible.
"""
try:
diagnosis = {
'document_id': document.id,
'file_id': document.fileId,
'has_component_interface': document._componentInterface is not None,
'component_interface_type': type(document._componentInterface).__name__ if document._componentInterface else None,
'file_exists': False,
'file_info': None,
'error_details': None
}
# Check if component interface is set
if not document._componentInterface:
diagnosis['error_details'] = "Component interface not set - document cannot access file system"
return diagnosis
# Try to access the file directly
try:
file_info = self.interfaceDbComponent.getFile(document.fileId)
if file_info:
diagnosis['file_exists'] = True
diagnosis['file_info'] = {
'fileName': file_info.fileName if hasattr(file_info, 'fileName') else 'N/A',
'fileSize': file_info.fileSize if hasattr(file_info, 'fileSize') else 'N/A',
'mimeType': file_info.mimeType if hasattr(file_info, 'mimeType') else 'N/A'
}
else:
diagnosis['error_details'] = f"File with ID {document.fileId} not found in component interface"
except Exception as e:
diagnosis['error_details'] = f"Error accessing file {document.fileId}: {str(e)}"
return diagnosis
except Exception as e:
return {
'document_id': document.id if hasattr(document, 'id') else 'unknown',
'file_id': document.fileId if hasattr(document, 'fileId') else 'unknown',
'error_details': f"Error during diagnosis: {str(e)}"
}
def _recoverDocumentAccess(self, document: ChatDocument) -> bool:
"""
Attempt to recover document access by re-setting the component interface.
Returns True if recovery was successful.
"""
try:
logger.info(f"Attempting to recover document access for document {document.id}")
# Re-set the component interface
document.setComponentInterface(self.interfaceDbComponent)
# Test if we can now access the fileName
try:
test_fileName = document.fileName
logger.info(f"Document access recovered for {document.id} -> {test_fileName}")
return True
except Exception as e:
logger.error(f"Document access recovery failed for {document.id}: {str(e)}")
return False
except Exception as e:
logger.error(f"Error during document access recovery for {document.id}: {str(e)}")
return False
def calculateObjectSize(self, obj: Any) -> int:
"""
Calculate the size of an object in bytes.
Args:
obj: Object to calculate size for
Returns:
int: Size in bytes
"""
try:
import json
import sys
if obj is None:
return 0
# Convert object to JSON string and calculate size
json_str = json.dumps(obj, ensure_ascii=False, default=str)
return len(json_str.encode('utf-8'))
except Exception as e:
logger.error(f"Error calculating object size: {str(e)}")
return 0
def getWorkflowContext(self) -> Dict[str, int]:
"""Get current workflow context for document generation"""
try:
workflow = self._workflow
if not workflow:
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
return {
'currentRound': workflow.currentRound if hasattr(workflow, 'currentRound') else 0,
'currentTask': workflow.currentTask if hasattr(workflow, 'currentTask') else 0,
'currentAction': workflow.currentAction if hasattr(workflow, 'currentAction') else 0
}
except Exception as e:
logger.error(f"Error getting workflow context: {str(e)}")
return {'currentRound': 0, 'currentTask': 0, 'currentAction': 0}
def setWorkflowContext(self, roundNumber: int = None, taskNumber: int = None, actionNumber: int = None):
"""Set current workflow context for document generation and routing"""
try:
workflow = self._workflow
if not workflow:
logger.error("setWorkflowContext: No workflow available")
return
# Prepare update data
update_data = {}
if roundNumber is not None:
workflow.currentRound = roundNumber
update_data["currentRound"] = roundNumber
if taskNumber is not None:
workflow.currentTask = taskNumber
update_data["currentTask"] = taskNumber
if actionNumber is not None:
workflow.currentAction = actionNumber
update_data["currentAction"] = actionNumber
# Persist changes to database if any updates were made
if update_data:
self.interfaceDbChat.updateWorkflow(workflow.id, update_data)
logger.debug(f"Updated workflow context: Round {workflow.currentRound if hasattr(workflow, 'currentRound') else 'N/A'}, Task {workflow.currentTask if hasattr(workflow, 'currentTask') else 'N/A'}, Action {workflow.currentAction if hasattr(workflow, 'currentAction') else 'N/A'}")
except Exception as e:
logger.error(f"Error setting workflow context: {str(e)}")
def getWorkflowStats(self) -> Dict[str, Any]:
"""Get comprehensive workflow statistics including current context"""
try:
workflow = self._workflow
workflow_context = self.getWorkflowContext()
if not workflow:
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}
return {
'currentRound': workflow_context['currentRound'],
'currentTask': workflow_context['currentTask'],
'currentAction': workflow_context['currentAction'],
'totalTasks': workflow.totalTasks if hasattr(workflow, 'totalTasks') else 0,
'totalActions': workflow.totalActions if hasattr(workflow, 'totalActions') else 0,
'workflowStatus': workflow.status if hasattr(workflow, 'status') else 'unknown',
'workflowId': workflow.id if hasattr(workflow, 'id') else 'unknown'
}
except Exception as e:
logger.error(f"Error getting workflow stats: {str(e)}")
return {
'currentRound': 0,
'currentTask': 0,
'currentAction': 0,
'totalTasks': 0,
'totalActions': 0,
'workflowStatus': 'unknown',
'workflowId': 'unknown'
}
def createWorkflow(self, workflowData: Dict[str, Any]):
"""Create a new workflow by delegating to the chat interface"""
try:
return self.interfaceDbChat.createWorkflow(workflowData)
except Exception as e:
logger.error(f"Error creating workflow: {str(e)}")
raise
def updateWorkflow(self, workflowId: str, updateData: Dict[str, Any]):
"""Update workflow by delegating to the chat interface"""
try:
return self.interfaceDbChat.updateWorkflow(workflowId, updateData)
except Exception as e:
logger.error(f"Error updating workflow: {str(e)}")
raise
def getWorkflow(self, workflowId: str):
"""Get workflow by ID by delegating to the chat interface"""
try:
logger.debug(f"getWorkflow called with workflowId: {workflowId}")
result = self.interfaceDbChat.getWorkflow(workflowId)
if result:
logger.debug(f"getWorkflow returned workflow with ID: {result.id}")
else:
logger.warning(f"getWorkflow returned None for workflowId: {workflowId}")
return result
except Exception as e:
logger.error(f"Error getting workflow: {str(e)}")
raise
# === Service-level transactions (DB write-through + in-memory sync) ===
def storeMessageWithDocuments(self, workflow: Any, messageData: Dict[str, Any], documents: List[Any]) -> ChatMessage:
"""Persist message and documents, then bind them into in-memory workflow (replace-by-id)."""
# Ensure workflowId on message
messageData = dict(messageData or {})
messageData["workflowId"] = workflow.id
# Attach documents to message creation via interface (it persists message then docs)
messageDataWithDocs = dict(messageData)
messageDataWithDocs["documents"] = documents or []
chatInterface = self.interfaceDbChat
chatMessage = chatInterface.createMessage(messageDataWithDocs)
if not chatMessage:
raise ValueError("Failed to create message with documents")
# In-memory sync: replace or append
# replace-by-id if exists
replaced = False
for i, m in enumerate(workflow.messages or []):
if getattr(m, 'id', None) == getattr(chatMessage, 'id', None):
workflow.messages[i] = chatMessage
replaced = True
break
if not replaced:
workflow.messages.append(chatMessage)
return chatMessage
def storeLog(self, workflow: Any, logData: Dict[str, Any]) -> ChatLog:
"""Persist ChatLog and map it into the in-memory workflow logs list."""
logData = dict(logData or {})
logData["workflowId"] = workflow.id
# Auto-populate roundNumber from workflow's currentRound if not explicitly set
if "roundNumber" not in logData or logData["roundNumber"] is None:
currentRound = getattr(workflow, 'currentRound', None)
# Default to 1 if workflow doesn't have currentRound set
if currentRound is None:
currentRound = 1
logger.warning(f"storeLog: workflow.currentRound is None, defaulting to 1")
logData["roundNumber"] = currentRound
logger.debug(f"storeLog: Set roundNumber={currentRound} for log: {logData.get('message', '')[:50]}")
chatInterface = self.interfaceDbChat
chatLog = chatInterface.createLog(logData)
if not chatLog:
raise ValueError("Failed to create log")
# replace-by-id if exists
replaced = False
for i, lg in enumerate(workflow.logs):
if getattr(lg, 'id', None) == getattr(chatLog, 'id', None):
workflow.logs[i] = chatLog
replaced = True
break
if not replaced:
workflow.logs.append(chatLog)
return chatLog
def updateMessage(self, messageId: str, messageData: Dict[str, Any]):
"""Update message by delegating to the chat interface"""
try:
return self.interfaceDbChat.updateMessage(messageId, messageData)
except Exception as e:
logger.error(f"Error updating message: {str(e)}")
raise
def getDocumentCount(self) -> str:
"""Get document count for task planning (matching old handlingTasks.py logic)"""
try:
workflow = self._workflow
if not workflow:
return "No documents available"
# Count documents from all messages in the workflow (like old system)
total_docs = 0
for message in workflow.messages:
if hasattr(message, 'documents') and message.documents:
total_docs += len(message.documents)
if total_docs == 0:
return "No documents available"
return f"{total_docs} document(s) available"
except Exception as e:
logger.error(f"Error getting document count: {str(e)}")
return "No documents available"
def getWorkflowHistoryContext(self) -> str:
"""Get workflow history context for task planning (matching old handlingTasks.py logic)"""
try:
workflow = self._workflow
if not workflow:
return "No previous round context available"
# Check if there are any previous rounds by looking for "first" messages
has_previous_rounds = False
for message in workflow.messages:
if hasattr(message, 'status') and message.status == "first":
has_previous_rounds = True
break
if not has_previous_rounds:
return "No previous round context available"
# Get document reference list to show what documents are available from previous rounds
document_list = self._getDocumentReferenceList(workflow)
# Build context string showing previous rounds
context = "Previous workflow rounds contain documents:\n"
# Show history exchanges (previous rounds)
if document_list["history"]:
for exchange in document_list["history"]:
# Use label-only format to avoid stale message ID references
# Labels are stable identifiers that persist across workflow state changes
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} ({len(exchange['documents'])} documents)\n"
else:
context = "No previous round context available"
return context
except Exception as e:
logger.error(f"Error getting workflow history context: {str(e)}")
return "No previous round context available"
def getAvailableDocuments(self, workflow) -> str:
"""Get available documents formatted for AI prompts (exact copy of old ServiceCenter.getEnhancedDocumentContext)"""
try:
if not workflow or not hasattr(workflow, 'messages'):
return "No documents available"
workflowId = workflow.id if hasattr(workflow, 'id') else 'NO_ID'
workflowObjId = id(workflow)
logger.debug(f"getAvailableDocuments: workflow.id = {workflowId}, workflow object id = {workflowObjId}")
# Root cause analysis: Verify workflow.messages integrity and detect workflow changes
self._verifyWorkflowMessagesIntegrity(workflow, workflowId)
# Use the provided workflow object directly to avoid database reload issues
# that can cause filename truncation. The workflow object should already be up-to-date.
# Get document reference list using the exact same logic as old system
document_list = self._getDocumentReferenceList(workflow)
# Timestamp-only available documents index dump removed
# Build index string for AI action planning
context = ""
# Process current round exchanges first
if document_list["chat"]:
context += "\nCurrent round documents:\n"
for exchange in document_list["chat"]:
# Use label-only format to avoid stale message ID references
# Labels are stable identifiers that persist across workflow state changes
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange['documents']:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
# Process previous rounds after
if document_list["history"]:
context += "\nPast rounds documents:\n"
for exchange in document_list["history"]:
# Use label-only format to avoid stale message ID references
# Labels are stable identifiers that persist across workflow state changes
doc_list_ref = f"docList:{exchange['documentsLabel']}"
context += f"- {doc_list_ref} contains:\n"
# Generate docItem references for each document in the list
for doc_ref in exchange['documents']:
if doc_ref.startswith("docItem:"):
context += f" - {doc_ref}\n"
else:
# Convert to proper docItem format if needed
context += f" - docItem:{doc_ref}\n"
context += "\n"
if not document_list["chat"] and not document_list["history"]:
context += "\nNO DOCUMENTS AVAILABLE - This workflow has no documents to process.\n"
return context
except Exception as e:
logger.error(f"Error getting available documents: {str(e)}")
return "NO DOCUMENTS AVAILABLE - Error generating document context."
def _getDocumentReferenceList(self, workflow) -> Dict[str, List]:
"""Get list of document exchanges with new labeling format, sorted by recency (exact copy of old system)"""
# Collect all documents first and refresh their attributes
all_documents = []
for message in workflow.messages:
if message.documents:
all_documents.extend(message.documents)
# Refresh file attributes for all documents
if all_documents:
self._refreshDocumentFileAttributes(all_documents)
def _is_valid_document(doc) -> bool:
try:
size_ok = getattr(doc, 'fileSize', 0) and getattr(doc, 'fileSize', 0) > 0
id_ok = bool(getattr(doc, 'fileId', None))
mime_ok = bool(getattr(doc, 'mimeType', None))
return size_ok and id_ok and mime_ok
except Exception:
return False
# Simplified, deterministic logic:
# - Walk messages newest-first
# - For each document, assign it exactly once to a bucket based on the message round
# - Never allow the same doc to appear in both buckets
chat_exchanges = []
history_exchanges = []
seen_doc_ids = set()
current_round = getattr(workflow, 'currentRound', None)
for message in reversed(workflow.messages):
if not getattr(message, 'documents', None):
continue
label = getattr(message, 'documentsLabel', None)
if not label:
# Skip messages without a label to keep references consistent
continue
doc_refs = []
for doc in message.documents:
if not _is_valid_document(doc):
continue
# Avoid duplicates across chat/history
doc_id = getattr(doc, 'id', None)
if not doc_id or doc_id in seen_doc_ids:
continue
seen_doc_ids.add(doc_id)
doc_ref = self.getDocumentReferenceFromChatDocument(doc)
doc_refs.append(doc_ref)
if not doc_refs:
continue
entry = {
'documentsLabel': label,
'documents': doc_refs
}
msg_round = getattr(message, 'roundNumber', None)
if current_round is not None and msg_round == current_round:
chat_exchanges.append(entry)
else:
history_exchanges.append(entry)
return {
"chat": chat_exchanges,
"history": history_exchanges
}
def _refreshDocumentFileAttributes(self, documents) -> None:
"""Update file attributes (fileName, fileSize, mimeType) for documents"""
for doc in documents:
try:
original_filename = doc.fileName
# Skip invalid docs early if essential identifiers are missing
if not getattr(doc, 'fileId', None):
logger.debug(f"Skipping document {doc.id} due to missing fileId")
setattr(doc, 'fileSize', 0)
setattr(doc, 'mimeType', None)
continue
file_info = self.getFileInfo(doc.fileId)
if file_info:
db_filename = file_info.get("fileName", doc.fileName)
doc.fileName = file_info.get("fileName", doc.fileName)
doc.fileSize = file_info.get("size", doc.fileSize)
doc.mimeType = file_info.get("mimeType", doc.mimeType)
# Mark invalid if missing mimeType
if not doc.mimeType:
logger.debug(f"Document {doc.id} has missing mimeType; will be filtered from index")
setattr(doc, 'fileSize', 0)
else:
logger.warning(f"File not found for document {doc.id}, fileId: {doc.fileId}")
setattr(doc, 'fileSize', 0)
setattr(doc, 'mimeType', None)
except Exception as e:
logger.error(f"Error refreshing file attributes for document {doc.id}: {e}")
def _generateWorkflowContextPrefix(self, message) -> str:
"""Generate workflow context prefix: round{num}_task{num}_action{num}"""
round_num = message.roundNumber if hasattr(message, 'roundNumber') else 1
task_num = message.taskNumber if hasattr(message, 'taskNumber') else 0
action_num = message.actionNumber if hasattr(message, 'actionNumber') else 0
return f"round{round_num}_task{task_num}_action{action_num}"
def getDocumentReferenceFromChatDocument(self, document) -> str:
"""Get document reference using document ID and filename."""
try:
# Use document ID and filename for simple reference
return f"docItem:{document.id}:{document.fileName}"
except Exception as e:
logger.error(f"Critical error creating document reference for document {document.id}: {str(e)}")
# Re-raise the error to prevent workflow from continuing with invalid data
raise
def _getMessageSequenceForExchange(self, exchange, workflow) -> int:
"""Get message sequence number for sorting exchanges by recency"""
try:
# Extract message ID from the first document reference
if exchange['documents'] and len(exchange['documents']) > 0:
first_doc_ref = exchange['documents'][0]
if first_doc_ref.startswith("docItem:"):
# docItem:<id>:<label> - extract ID
parts = first_doc_ref.split(':')
if len(parts) >= 2:
doc_id = parts[1]
# Find the message containing this document
for message in workflow.messages:
if message.documents:
for doc in message.documents:
if doc.id == doc_id:
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
elif first_doc_ref.startswith("docList:"):
# docList:<message_id>:<label> - extract message ID
parts = first_doc_ref.split(':')
if len(parts) >= 2:
message_id = parts[1]
# Find the message by ID
for message in workflow.messages:
if str(message.id) == message_id:
return message.sequenceNr if hasattr(message, 'sequenceNr') else 0
return 0
except Exception as e:
logger.error(f"Error getting message sequence for exchange: {str(e)}")
return 0
def _validateDocumentLabelConsistency(self, message) -> str:
"""Validate that the document label used for references matches the message's actual label"""
if not hasattr(message, 'documentsLabel') or not message.documentsLabel:
return None
# Simply return the message's actual documentsLabel - no correction, just validation
return message.documentsLabel
def getConnectionReferenceList(self) -> List[str]:
"""Get connection reference list (matching old handlingTasks.py logic)"""
try:
# Get connections from the database using the same logic as the old system
if self.interfaceDbApp and self.user:
userId = self.user.id
connections = self.interfaceDbApp.getUserConnections(userId)
if connections:
# Format connections as reference strings using the same pattern as the old system
connectionRefs = []
for conn in connections:
# Create reference string in format: connection:{authority}:{username} [status:..., token:...]
# This matches the format expected by getUserConnectionFromConnectionReference()
ref = self.getConnectionReferenceFromUserConnection(conn)
connectionRefs.append(ref)
return connectionRefs
return []
except Exception as e:
logger.error(f"Error getting connection reference list: {str(e)}")
return []
def _getProgressLogger(self):
"""Get or create the progress logger instance."""
if self._progressLogger is None:
# Adapter for ProgressLogger: needs .workflow and .chat.storeLog
class _ProgressAdapter:
def __init__(self, svc):
self._svc = svc
@property
def workflow(self):
return self._svc._context.workflow
@property
def chat(self):
return self._svc
self._progressLogger = ProgressLogger(_ProgressAdapter(self))
return self._progressLogger
def createProgressLogger(self) -> ProgressLogger:
"""Get or create the progress logger instance (singleton)"""
return self._getProgressLogger()
def progressLogStart(self, operationId: str, serviceName: str, actionName: str, context: str = "", parentOperationId: Optional[str] = None):
"""Wrapper for ProgressLogger.startOperation
Args:
operationId: Unique identifier for the operation
serviceName: Name of the service
actionName: Name of the action
context: Additional context information
parentOperationId: Optional parent operation ID (operationId of parent operation)
The parentId in ChatLog will be set to this parentOperationId
"""
progressLogger = self._getProgressLogger()
return progressLogger.startOperation(operationId, serviceName, actionName, context, parentOperationId)
def progressLogUpdate(self, operationId: str, progress: float, statusUpdate: str = ""):
"""Wrapper for ProgressLogger.updateOperation"""
progressLogger = self._getProgressLogger()
return progressLogger.updateOperation(operationId, progress, statusUpdate)
def progressLogFinish(self, operationId: str, success: bool = True):
"""Wrapper for ProgressLogger.finishOperation"""
progressLogger = self._getProgressLogger()
return progressLogger.finishOperation(operationId, success)
def getOperationLogId(self, operationId: str) -> Optional[str]:
"""Get the log entry ID for an operation (the start log entry)."""
progressLogger = self._getProgressLogger()
return progressLogger.getOperationLogId(operationId)