# Copyright (c) 2025 Patrick Motsch # All rights reserved. """ Interface to LucyDOM database and AI Connectors. Uses the JSON connector for data access with added language support. """ import logging import uuid import math from typing import Dict, Any, List, Optional, Union import asyncio from modules.security.rbac import RbacClass from modules.datamodels.datamodelRbac import AccessRuleContext from modules.datamodels.datamodelUam import AccessLevel from modules.datamodels.datamodelChat import ( ChatDocument, ChatLog, ChatMessage, ChatWorkflow, WorkflowModeEnum, UserInputRequest ) import json from modules.datamodels.datamodelUam import User # DYNAMIC PART: Connectors to the Interface from modules.connectors.connectorDbPostgre import DatabaseConnector from modules.shared.timeUtils import getUtcTimestamp, parseTimestamp from modules.datamodels.datamodelPagination import PaginationParams, PaginatedResult from modules.interfaces.interfaceRbac import getRecordsetWithRBAC # Basic Configurations from modules.shared.configuration import APP_CONFIG logger = logging.getLogger(__name__) # Singleton factory for Chat instances _chatInterfaces = {} def storeDebugMessageAndDocuments(message, currentUser, mandateId=None, featureInstanceId=None) -> None: """ Store message and documents (metadata and file bytes) for debugging purposes. Structure: {log_dir}/debug/messages/m_round_task_action_timestamp/documentlist_label/ - message.json, message_text.txt - document_###_metadata.json - document_###_ (actual file bytes) Args: message: ChatMessage object to store currentUser: Current user for component interface access mandateId: Mandate ID for RBAC context (avoids overwriting singleton state) featureInstanceId: Feature instance ID for RBAC context """ try: import os from datetime import datetime, UTC from modules.shared.debugLogger import _getBaseDebugDir, _ensureDir from modules.interfaces.interfaceDbManagement import getInterface # Create base debug directory (use base debug dir, not prompts subdirectory) baseDebugDir = _getBaseDebugDir() debug_root = os.path.join(baseDebugDir, 'messages') _ensureDir(debug_root) # Generate timestamp timestamp = datetime.now(UTC).strftime('%Y%m%d-%H%M%S-%f')[:-3] # Create message folder name: m_round_task_action_timestamp # Use actual values from message, not defaults round_str = str(message.roundNumber) if message.roundNumber is not None else "0" task_str = str(message.taskNumber) if message.taskNumber is not None else "0" action_str = str(message.actionNumber) if message.actionNumber is not None else "0" message_folder = f"{timestamp}_m_{round_str}_{task_str}_{action_str}" message_path = os.path.join(debug_root, message_folder) os.makedirs(message_path, exist_ok=True) # Store message data - use dict() instead of model_dump() for compatibility message_file = os.path.join(message_path, "message.json") with open(message_file, "w", encoding="utf-8") as f: # Convert message to dict manually to avoid model_dump() issues message_dict = { "id": message.id, "workflowId": message.workflowId, "parentMessageId": message.parentMessageId, "message": message.message, "role": message.role, "status": message.status, "sequenceNr": message.sequenceNr, "publishedAt": message.publishedAt, "roundNumber": message.roundNumber, "taskNumber": message.taskNumber, "actionNumber": message.actionNumber, "documentsLabel": message.documentsLabel, "actionId": message.actionId, "actionMethod": message.actionMethod, "actionName": message.actionName, "success": message.success, "documents": [] } json.dump(message_dict, f, indent=2, ensure_ascii=False, default=str) # Store message content as text if message.message: message_text_file = os.path.join(message_path, "message_text.txt") with open(message_text_file, "w", encoding="utf-8") as f: f.write(str(message.message)) # Store documents if provided if message.documents and len(message.documents) > 0: # Group documents by documentsLabel documents_by_label = {} for doc in message.documents: label = message.documentsLabel or 'default' if label not in documents_by_label: documents_by_label[label] = [] documents_by_label[label].append(doc) # Create subfolder for each document label for label, docs in documents_by_label.items(): # Sanitize label for filesystem safe_label = "".join(c for c in str(label) if c.isalnum() or c in (' ', '-', '_')).rstrip() safe_label = safe_label.replace(' ', '_') if not safe_label: safe_label = "default" label_folder = os.path.join(message_path, safe_label) _ensureDir(label_folder) # Store each document for i, doc in enumerate(docs): # Create document metadata file doc_meta = { "id": doc.id, "messageId": doc.messageId, "fileId": doc.fileId, "fileName": doc.fileName, "fileSize": doc.fileSize, "mimeType": doc.mimeType, "roundNumber": doc.roundNumber, "taskNumber": doc.taskNumber, "actionNumber": doc.actionNumber, "actionId": doc.actionId } doc_meta_file = os.path.join(label_folder, f"document_{i+1:03d}_metadata.json") with open(doc_meta_file, "w", encoding="utf-8") as f: json.dump(doc_meta, f, indent=2, ensure_ascii=False, default=str) # Also store the actual file bytes next to metadata for debugging try: componentInterface = getInterface(currentUser, mandateId=mandateId, featureInstanceId=featureInstanceId) file_bytes = componentInterface.getFileData(doc.fileId) if file_bytes: # Build a safe filename preserving original name safe_name = doc.fileName or f"document_{i+1:03d}" # Avoid path traversal safe_name = os.path.basename(safe_name) doc_file_path = os.path.join(label_folder, f"document_{i+1:03d}_" + safe_name) with open(doc_file_path, "wb") as df: df.write(file_bytes) else: pass except Exception as e: pass except Exception as e: # Silent fail - don't break main flow pass class ChatObjects: """ Interface to Chat database and AI Connectors. Uses the JSON connector for data access with added language support. """ def __init__(self, currentUser: Optional[User] = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None): """Initializes the Chat Interface. Args: currentUser: The authenticated user mandateId: The mandate ID from RequestContext (X-Mandate-Id header) featureInstanceId: The feature instance ID from RequestContext (X-Feature-Instance-Id header) """ # Initialize variables self.currentUser = currentUser # Store User object directly self.userId = currentUser.id if currentUser else None # Use mandateId from parameter (Request-Context), not from user object self.mandateId = mandateId self.featureInstanceId = featureInstanceId self.rbac = None # RBAC interface # Initialize services self._initializeServices() # Initialize database self._initializeDatabase() # Set user context if provided if currentUser: self.setUserContext(currentUser, mandateId=mandateId, featureInstanceId=featureInstanceId) # ===== Generic Utility Methods ===== def _isObjectField(self, fieldType) -> bool: """Check if a field type represents a complex object (not a simple type).""" # Simple scalar types if fieldType in (str, int, float, bool, type(None)): return False # Everything else is an object return True def _separateObjectFields(self, model_class, data: Dict[str, Any]) -> tuple[Dict[str, Any], Dict[str, Any]]: """Separate simple fields from object fields based on Pydantic model structure.""" simpleFields = {} objectFields = {} # Get field information from the Pydantic model modelFields = model_class.model_fields for fieldName, value in data.items(): # Check if this field should be stored as JSONB in the database if fieldName in modelFields: fieldInfo = modelFields[fieldName] # Pydantic v2 only fieldType = fieldInfo.annotation # Always route relational/object fields to object_fields for separate handling # These fields are stored in separate normalized tables, not as JSONB if fieldName in ['documents', 'stats', 'logs', 'messages']: objectFields[fieldName] = value continue # Check if this is a JSONB field (Dict, List, or complex types) # Purely type-based detection - no hardcoded field names if (fieldType == dict or fieldType == list or (hasattr(fieldType, '__origin__') and fieldType.__origin__ in (dict, list))): # Store as JSONB - include in simple_fields for database storage simpleFields[fieldName] = value elif isinstance(value, (str, int, float, bool, type(None))): # Simple scalar types simpleFields[fieldName] = value else: # Complex objects that should be filtered out objectFields[fieldName] = value else: # Field not in model - treat as scalar if simple, otherwise filter out # BUT: always include metadata fields (_createdBy, _createdAt, etc.) as they're handled by connector if fieldName.startswith("_"): # Metadata fields should be passed through to connector simpleFields[fieldName] = value elif isinstance(value, (str, int, float, bool, type(None))): simpleFields[fieldName] = value else: objectFields[fieldName] = value return simpleFields, objectFields def _initializeServices(self): pass def setUserContext(self, currentUser: User, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None): """Sets the user context for the interface. Args: currentUser: The authenticated user mandateId: The mandate ID from RequestContext (X-Mandate-Id header) featureInstanceId: The feature instance ID from RequestContext (X-Feature-Instance-Id header) """ self.currentUser = currentUser # Store User object directly self.userId = currentUser.id # Use mandateId from parameter (Request-Context), not from user object self.mandateId = mandateId self.featureInstanceId = featureInstanceId if not self.userId: raise ValueError("Invalid user context: id is required") # Note: mandateId is ALWAYS optional here - it comes from Request-Context, not from User. # Users are NOT assigned to mandates by design - they get mandate context from the request. # sysAdmin users can additionally perform cross-mandate operations. # Without mandateId, operations will be filtered to accessible mandates via RBAC. # Add language settings self.userLanguage = currentUser.language # Default user language # Initialize RBAC interface if not self.currentUser: raise ValueError("User context is required for RBAC") # Get DbApp connection for RBAC AccessRule queries from modules.security.rootAccess import getRootDbAppConnector dbApp = getRootDbAppConnector() self.rbac = RbacClass(self.db, dbApp=dbApp) # Update database context self.db.updateContext(self.userId) def __del__(self): """Cleanup method to close database connection.""" if hasattr(self, 'db') and self.db is not None: try: self.db.close() except Exception as e: logger.error(f"Error closing database connection: {e}") def _initializeDatabase(self): """Initializes the database connection directly.""" try: # Get configuration values with defaults dbHost = APP_CONFIG.get("DB_HOST", "_no_config_default_data") dbDatabase = "poweron_chat" dbUser = APP_CONFIG.get("DB_USER") dbPassword = APP_CONFIG.get("DB_PASSWORD_SECRET") dbPort = int(APP_CONFIG.get("DB_PORT", 5432)) # Create database connector directly self.db = DatabaseConnector( dbHost=dbHost, dbDatabase=dbDatabase, dbUser=dbUser, dbPassword=dbPassword, dbPort=dbPort, userId=self.userId ) logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {str(e)}") raise def _initRecords(self): """Initializes standard records in the database if they don't exist.""" pass def _getRecordset(self, modelClass, recordFilter=None, **kwargs): """Wrapper for getRecordsetWithRBAC that automatically includes mandateId/featureInstanceId.""" return getRecordsetWithRBAC( self.db, modelClass, self.currentUser, recordFilter=recordFilter, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId, **kwargs ) def checkRbacPermission( self, modelClass: type, operation: str, recordId: Optional[str] = None ) -> bool: """ Check RBAC permission for a specific operation on a table. Args: modelClass: Pydantic model class for the table operation: Operation to check ('create', 'update', 'delete', 'read') recordId: Optional record ID for specific record check Returns: Boolean indicating permission """ if not self.rbac or not self.currentUser: return False tableName = modelClass.__name__ # Use buildDataObjectKey for semantic namespace lookup from modules.interfaces.interfaceRbac import buildDataObjectKey objectKey = buildDataObjectKey(tableName) permissions = self.rbac.getUserPermissions( self.currentUser, AccessRuleContext.DATA, objectKey, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId ) if operation == "create": return permissions.create != AccessLevel.NONE elif operation == "update": return permissions.update != AccessLevel.NONE elif operation == "delete": return permissions.delete != AccessLevel.NONE elif operation == "read": return permissions.read != AccessLevel.NONE else: return False def _applyFilters(self, records: List[Dict[str, Any]], filters: Dict[str, Any]) -> List[Dict[str, Any]]: """ Apply filter criteria to records. Supports: - General search: {"search": "text"} - searches across all text fields - Field-specific filters: - Simple: {"status": "running"} - equals match - With operator: {"status": {"operator": "equals", "value": "running"}} - Operators: equals, contains, gt, gte, lt, lte, in, notIn, startsWith, endsWith Args: records: List of record dictionaries to filter filters: Filter criteria dictionary Returns: Filtered list of records """ if not filters or not records: return records filtered = [] for record in records: matches = True # Handle general search across text fields if "search" in filters: search_term = str(filters["search"]).lower() if search_term: # Search in all string fields found = False for key, value in record.items(): if isinstance(value, str) and search_term in value.lower(): found = True break elif isinstance(value, (int, float)) and search_term in str(value): found = True break if not found: matches = False # Handle field-specific filters for field_name, filter_value in filters.items(): if field_name == "search": continue # Already handled above if field_name not in record: matches = False break record_value = record.get(field_name) # Handle simple value (equals operator) if not isinstance(filter_value, dict): if record_value != filter_value: matches = False break continue # Handle filter with operator operator = filter_value.get("operator", "equals") filter_val = filter_value.get("value") if operator in ["equals", "eq"]: if record_value != filter_val: matches = False break elif operator == "contains": record_str = str(record_value).lower() if record_value is not None else "" filter_str = str(filter_val).lower() if filter_val is not None else "" if filter_str not in record_str: matches = False break elif operator == "startsWith": record_str = str(record_value).lower() if record_value is not None else "" filter_str = str(filter_val).lower() if filter_val is not None else "" if not record_str.startswith(filter_str): matches = False break elif operator == "endsWith": record_str = str(record_value).lower() if record_value is not None else "" filter_str = str(filter_val).lower() if filter_val is not None else "" if not record_str.endswith(filter_str): matches = False break elif operator == "gt": try: record_num = float(record_value) if record_value is not None else float('-inf') filter_num = float(filter_val) if filter_val is not None else float('-inf') if record_num <= filter_num: matches = False break except (ValueError, TypeError): matches = False break elif operator == "gte": try: record_num = float(record_value) if record_value is not None else float('-inf') filter_num = float(filter_val) if filter_val is not None else float('-inf') if record_num < filter_num: matches = False break except (ValueError, TypeError): matches = False break elif operator == "lt": try: record_num = float(record_value) if record_value is not None else float('inf') filter_num = float(filter_val) if filter_val is not None else float('inf') if record_num >= filter_num: matches = False break except (ValueError, TypeError): matches = False break elif operator == "lte": try: record_num = float(record_value) if record_value is not None else float('inf') filter_num = float(filter_val) if filter_val is not None else float('inf') if record_num > filter_num: matches = False break except (ValueError, TypeError): matches = False break elif operator == "in": if not isinstance(filter_val, list): filter_val = [filter_val] if record_value not in filter_val: matches = False break elif operator == "notIn": if not isinstance(filter_val, list): filter_val = [filter_val] if record_value in filter_val: matches = False break else: # Unknown operator - default to equals if record_value != filter_val: matches = False break if matches: filtered.append(record) return filtered def _applySorting(self, records: List[Dict[str, Any]], sortFields: List[Any]) -> List[Dict[str, Any]]: """Apply multi-level sorting to records using stable sort (sorts from least to most significant field).""" if not sortFields: return records # Start with a copy to avoid modifying original sortedRecords = list(records) # Sort from least significant to most significant field (reverse order) # Python's sort is stable, so this creates proper multi-level sorting for sortField in reversed(sortFields): # Handle both dict and object formats if isinstance(sortField, dict): fieldName = sortField.get("field") direction = sortField.get("direction", "asc") else: fieldName = getattr(sortField, "field", None) direction = getattr(sortField, "direction", "asc") if not fieldName: continue isDesc = (direction == "desc") def sortKey(record): value = record.get(fieldName) # Handle None values - place them at the end for both directions if value is None: # Use a special value that sorts last return (1, "") # (is_none_flag, empty_value) - sorts after (0, ...) else: # Return tuple with type indicator for proper comparison if isinstance(value, (int, float)): return (0, value) elif isinstance(value, str): return (0, value) elif isinstance(value, bool): return (0, value) else: return (0, str(value)) # Sort with reverse parameter for descending sortedRecords.sort(key=sortKey, reverse=isDesc) return sortedRecords # Utilities def getInitialId(self, model_class: type) -> Optional[str]: """Returns the initial ID for a table.""" return self.db.getInitialId(model_class) # Workflow methods def getWorkflows(self, pagination: Optional[PaginationParams] = None) -> Union[List[Dict[str, Any]], PaginatedResult]: """ Returns workflows based on user access level. Supports optional pagination, sorting, and filtering. Args: pagination: Optional pagination parameters. If None, returns all items. Returns: If pagination is None: List[Dict[str, Any]] If pagination is provided: PaginatedResult with items and metadata """ # Use RBAC filtering with featureInstanceId for instance-level isolation filteredWorkflows = self._getRecordset(ChatWorkflow) # If no pagination requested, return all items (no sorting - frontend handles it) if pagination is None: return filteredWorkflows # Apply filtering (if filters provided) if pagination.filters: filteredWorkflows = self._applyFilters(filteredWorkflows, pagination.filters) # Apply sorting (in order of sortFields) - only if provided by frontend if pagination.sort: filteredWorkflows = self._applySorting(filteredWorkflows, pagination.sort) # Count total items after filters totalItems = len(filteredWorkflows) totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0 # Apply pagination (skip/limit) startIdx = (pagination.page - 1) * pagination.pageSize endIdx = startIdx + pagination.pageSize pagedWorkflows = filteredWorkflows[startIdx:endIdx] return PaginatedResult( items=pagedWorkflows, totalItems=totalItems, totalPages=totalPages ) def getWorkflow(self, workflowId: str) -> Optional[ChatWorkflow]: """Returns a workflow by ID if user has access.""" # Use RBAC filtering with featureInstanceId for instance-level isolation workflows = self._getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: return None workflow = workflows[0] try: logs = self.getLogs(workflowId) messages = self.getMessages(workflowId) # Validate workflow data against ChatWorkflow model # Explicit type coercion: DB may store numeric fields as TEXT on some platforms def _toInt(v, default=0): try: return int(v) if v is not None else default except (ValueError, TypeError): return default def _toFloat(v, default=None): try: return float(v) if v is not None else (default if default is not None else getUtcTimestamp()) except (ValueError, TypeError): return default if default is not None else getUtcTimestamp() return ChatWorkflow( id=workflow["id"], status=workflow.get("status", "running"), name=workflow.get("name"), currentRound=_toInt(workflow.get("currentRound")), currentTask=_toInt(workflow.get("currentTask")), currentAction=_toInt(workflow.get("currentAction")), totalTasks=_toInt(workflow.get("totalTasks")), totalActions=_toInt(workflow.get("totalActions")), lastActivity=_toFloat(workflow.get("lastActivity")), startedAt=_toFloat(workflow.get("startedAt")), logs=logs, messages=messages ) except Exception as e: logger.error(f"Error validating workflow data: {str(e)}") return None def createWorkflow(self, workflowData: Dict[str, Any]) -> ChatWorkflow: """Creates a new workflow if user has permission.""" if not self.checkRbacPermission(ChatWorkflow, "create"): raise PermissionError("No permission to create workflows") # Set timestamp if not present currentTime = getUtcTimestamp() if "startedAt" not in workflowData: workflowData["startedAt"] = currentTime if "lastActivity" not in workflowData: workflowData["lastActivity"] = currentTime # Set mandateId and featureInstanceId from context for proper data isolation if "mandateId" not in workflowData or not workflowData["mandateId"]: # Use request context mandateId, or fall back to Root mandate effectiveMandateId = self.mandateId if not effectiveMandateId: # Fall back to Root mandate (first mandate in system) try: from modules.datamodels.datamodelUam import Mandate from modules.security.rootAccess import getRootDbAppConnector dbAppConn = getRootDbAppConnector() allMandates = dbAppConn.getRecordset(Mandate) if allMandates: effectiveMandateId = allMandates[0].get("id") logger.debug(f"createWorkflow: Using Root mandate {effectiveMandateId}") except Exception as e: logger.warning(f"Could not get Root mandate: {e}") # Note: ChatWorkflow has featureInstanceId for multi-tenancy isolation. # Child tables (ChatMessage, ChatLog, ChatDocument) are user-owned # and do NOT store featureInstanceId - they inherit isolation from ChatWorkflow. # Ensure featureInstanceId is set from context if not already in workflowData if "featureInstanceId" not in workflowData or not workflowData.get("featureInstanceId"): if self.featureInstanceId: workflowData["featureInstanceId"] = self.featureInstanceId # Use generic field separation based on ChatWorkflow model simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Create workflow in database created = self.db.recordCreate(ChatWorkflow, simpleFields) # Convert to ChatWorkflow model (empty related data for new workflow) return ChatWorkflow( id=created["id"], status=created.get("status", "running"), name=created.get("name"), currentRound=created.get("currentRound", 0) or 0, currentTask=created.get("currentTask", 0) or 0, currentAction=created.get("currentAction", 0) or 0, totalTasks=created.get("totalTasks", 0) or 0, totalActions=created.get("totalActions", 0) or 0, lastActivity=created.get("lastActivity", currentTime), startedAt=created.get("startedAt", currentTime), logs=[], messages=[], stats=[], workflowMode=created["workflowMode"], maxSteps=created.get("maxSteps", 1) ) def updateWorkflow(self, workflowId: str, workflowData: Dict[str, Any]) -> ChatWorkflow: """Updates a workflow if user has access.""" # Check if the workflow exists and user has access workflow = self.getWorkflow(workflowId) if not workflow: return None if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to update workflow {workflowId}") # Use generic field separation based on ChatWorkflow model simpleFields, objectFields = self._separateObjectFields(ChatWorkflow, workflowData) # Set update time for main workflow simpleFields["lastActivity"] = getUtcTimestamp() # Update main workflow in database updated = self.db.recordModify(ChatWorkflow, workflowId, simpleFields) # Removed cascade writes for logs/messages/stats during workflow update. # CUD for child entities must be executed via dedicated service methods. # Load fresh data from normalized tables logs = self.getLogs(workflowId) messages = self.getMessages(workflowId) return ChatWorkflow( id=updated["id"], status=updated.get("status", workflow.status), name=updated.get("name", workflow.name), currentRound=updated.get("currentRound", workflow.currentRound), currentTask=updated.get("currentTask", workflow.currentTask), currentAction=updated.get("currentAction", workflow.currentAction), totalTasks=updated.get("totalTasks", workflow.totalTasks), totalActions=updated.get("totalActions", workflow.totalActions), lastActivity=updated.get("lastActivity", workflow.lastActivity), startedAt=updated.get("startedAt", workflow.startedAt), logs=logs, messages=messages ) def deleteWorkflow(self, workflowId: str) -> bool: """Deletes a workflow and all related data if user has access.""" try: # Check if the workflow exists and user has access workflow = self.getWorkflow(workflowId) if not workflow: return False if not self.checkRbacPermission(ChatWorkflow, "delete", workflowId): raise PermissionError(f"No permission to delete workflow {workflowId}") # CASCADE DELETE: Delete all related data first # 1. Delete all workflow messages and their related data messages = self.getMessages(workflowId) for message in messages: messageId = message.id if messageId: # Delete message documents (but NOT the files!) try: existing_docs = self._getRecordset(ChatDocument, recordFilter={"messageId": messageId}) for doc in existing_docs: self.db.recordDelete(ChatDocument, doc["id"]) except Exception as e: logger.warning(f"Error deleting documents for message {messageId}: {e}") # Delete the message itself self.db.recordDelete(ChatMessage, messageId) # 2. Delete workflow stats # 2. Delete workflow logs existing_logs = self._getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) for log in existing_logs: self.db.recordDelete(ChatLog, log["id"]) # 4. Finally delete the workflow itself success = self.db.recordDelete(ChatWorkflow, workflowId) return success except Exception as e: logger.error(f"Error deleting workflow {workflowId}: {str(e)}") return False # Message methods def getMessages(self, workflowId: str, pagination: Optional[PaginationParams] = None) -> Union[List[ChatMessage], PaginatedResult]: """ Returns messages for a workflow if user has access to the workflow. Supports optional pagination, sorting, and filtering. Args: workflowId: The workflow ID to get messages for pagination: Optional pagination parameters. If None, returns all items. Returns: If pagination is None: List[ChatMessage] If pagination is provided: PaginatedResult with items and metadata """ # Check workflow access first (without calling getWorkflow to avoid circular reference) # Use RBAC filtering workflows = self._getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: if pagination is None: return [] return PaginatedResult(items=[], totalItems=0, totalPages=0) # Get messages for this workflow from normalized table messages = self._getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) # Convert raw messages to dict format for sorting/filtering messageDicts = [] for msg in messages: messageDicts.append({ "id": msg.get("id"), "workflowId": msg.get("workflowId"), "parentMessageId": msg.get("parentMessageId"), "documentsLabel": msg.get("documentsLabel"), "message": msg.get("message"), "role": msg.get("role", "assistant"), "status": msg.get("status", "step"), "sequenceNr": msg.get("sequenceNr", 0), "publishedAt": msg.get("publishedAt", msg.get("timestamp", getUtcTimestamp())), "success": msg.get("success"), "actionId": msg.get("actionId"), "actionMethod": msg.get("actionMethod"), "actionName": msg.get("actionName"), "roundNumber": msg.get("roundNumber"), "taskNumber": msg.get("taskNumber"), "actionNumber": msg.get("actionNumber"), "taskProgress": msg.get("taskProgress"), "actionProgress": msg.get("actionProgress") }) # Apply default sorting by publishedAt if no sort specified. # Use parseTimestamp to tolerate mixed DB types (float/string) on INT. if pagination is None or not pagination.sort: messageDicts.sort(key=lambda x: parseTimestamp(x.get("publishedAt"), default=0)) # Apply filtering (if filters provided) if pagination and pagination.filters: messageDicts = self._applyFilters(messageDicts, pagination.filters) # Apply sorting (in order of sortFields) if pagination and pagination.sort: messageDicts = self._applySorting(messageDicts, pagination.sort) # If no pagination requested, return all items if pagination is None: # Convert messages to ChatMessage objects and load documents chat_messages = [] for msg in messageDicts: # Load documents from normalized documents table documents = self.getDocuments(msg["id"]) # Create ChatMessage object with loaded documents chat_message = ChatMessage( id=msg["id"], workflowId=msg["workflowId"], parentMessageId=msg.get("parentMessageId"), documents=documents, documentsLabel=msg.get("documentsLabel"), message=msg.get("message"), role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), actionName=msg.get("actionName"), roundNumber=msg.get("roundNumber"), taskNumber=msg.get("taskNumber"), actionNumber=msg.get("actionNumber"), taskProgress=msg.get("taskProgress"), actionProgress=msg.get("actionProgress") ) chat_messages.append(chat_message) return chat_messages # Count total items after filters totalItems = len(messageDicts) totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0 # Apply pagination (skip/limit) startIdx = (pagination.page - 1) * pagination.pageSize endIdx = startIdx + pagination.pageSize pagedMessageDicts = messageDicts[startIdx:endIdx] # Convert messages to ChatMessage objects and load documents chat_messages = [] for msg in pagedMessageDicts: # Load documents from normalized documents table documents = self.getDocuments(msg["id"]) # Create ChatMessage object with loaded documents chat_message = ChatMessage( id=msg["id"], workflowId=msg["workflowId"], parentMessageId=msg.get("parentMessageId"), documents=documents, documentsLabel=msg.get("documentsLabel"), message=msg.get("message"), role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), actionName=msg.get("actionName"), roundNumber=msg.get("roundNumber"), taskNumber=msg.get("taskNumber"), actionNumber=msg.get("actionNumber"), taskProgress=msg.get("taskProgress"), actionProgress=msg.get("actionProgress") ) chat_messages.append(chat_message) return PaginatedResult( items=chat_messages, totalItems=totalItems, totalPages=totalPages ) def createMessage(self, messageData: Dict[str, Any], event_manager=None) -> ChatMessage: """Creates a message for a workflow if user has access.""" try: # Ensure ID is present if "id" not in messageData or not messageData["id"]: messageData["id"] = f"msg_{uuid.uuid4()}" # Check required fields requiredFields = ["id", "workflowId"] for field in requiredFields: if field not in messageData: logger.error(f"Required field '{field}' missing in messageData") raise ValueError(f"Required field '{field}' missing in message data") # Check workflow access workflowId = messageData["workflowId"] workflow = self.getWorkflow(workflowId) if not workflow: raise PermissionError(f"No access to workflow {workflowId}") if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Validate that ID is not None if messageData["id"] is None: messageData["id"] = f"msg_{uuid.uuid4()}" logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}") # Set status if not present if "status" not in messageData: messageData["status"] = "step" # Default status for intermediate messages # Ensure role and agentName are present if "role" not in messageData: messageData["role"] = "assistant" if messageData.get("agentName") else "user" if "agentName" not in messageData: messageData["agentName"] = "" # CRITICAL FIX: Automatically set roundNumber, taskNumber, and actionNumber if not provided # This ensures messages have the correct progress context when workflows are continued if "roundNumber" not in messageData: messageData["roundNumber"] = workflow.currentRound if "taskNumber" not in messageData: messageData["taskNumber"] = workflow.currentTask if "actionNumber" not in messageData: messageData["actionNumber"] = workflow.currentAction # Note: Chat data is user-owned, no mandate/featureInstance context stored # mandateId/featureInstanceId removed from ChatMessage model # Use generic field separation based on ChatMessage model simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Handle documents separately - they will be stored in normalized documents table documents_to_create = objectFields.get("documents", []) # Create message in normalized table using only simple fields createdMessage = self.db.recordCreate(ChatMessage, simpleFields) # Create documents in normalized documents table created_documents = [] logger.debug(f"Creating {len(documents_to_create)} document(s) for message {createdMessage['id']}") for idx, doc_data in enumerate(documents_to_create): try: # Normalize to plain dict before assignment if isinstance(doc_data, ChatDocument): doc_dict = doc_data.model_dump() elif isinstance(doc_data, dict): doc_dict = dict(doc_data) else: # Attempt to coerce to ChatDocument then dump try: doc_dict = ChatDocument(**doc_data).model_dump() except Exception as e: logger.error(f"Invalid document data type for message creation (document {idx + 1}/{len(documents_to_create)}): {e}") continue # Ensure messageId is set doc_dict["messageId"] = createdMessage["id"] logger.debug(f"Creating document {idx + 1}/{len(documents_to_create)}: fileName={doc_dict.get('fileName', 'unknown')}, fileId={doc_dict.get('fileId', 'unknown')}, messageId={doc_dict.get('messageId', 'unknown')}") created_doc = self.createDocument(doc_dict) if created_doc: created_documents.append(created_doc) logger.debug(f"Successfully created document {idx + 1}/{len(documents_to_create)}: {created_doc.fileName} (id: {created_doc.id})") else: logger.error(f"Failed to create document {idx + 1}/{len(documents_to_create)}: createDocument returned None for fileName={doc_dict.get('fileName', 'unknown')}") except Exception as e: logger.error(f"Error processing document {idx + 1}/{len(documents_to_create)}: {e}", exc_info=True) logger.info(f"Created {len(created_documents)}/{len(documents_to_create)} document(s) for message {createdMessage['id']}") # Convert to ChatMessage model chat_message = ChatMessage( id=createdMessage["id"], workflowId=createdMessage["workflowId"], parentMessageId=createdMessage.get("parentMessageId"), agentName=createdMessage.get("agentName"), documents=created_documents, documentsLabel=createdMessage.get("documentsLabel"), message=createdMessage.get("message"), role=createdMessage.get("role", "assistant"), status=createdMessage.get("status", "step"), sequenceNr=len(workflow.messages) + 1, # Use messages list length for sequence number publishedAt=createdMessage.get("publishedAt", getUtcTimestamp()), stats=objectFields.get("stats"), # Use stats from objectFields roundNumber=createdMessage.get("roundNumber"), taskNumber=createdMessage.get("taskNumber"), actionNumber=createdMessage.get("actionNumber"), success=createdMessage.get("success"), actionId=createdMessage.get("actionId"), actionMethod=createdMessage.get("actionMethod"), actionName=createdMessage.get("actionName") ) # Emit message event for streaming (if event manager is provided) if event_manager: try: message_timestamp = parseTimestamp(chat_message.publishedAt, default=getUtcTimestamp()) # Emit message event in exact chatData format: {type, createdAt, item} asyncio.create_task(event_manager.emit_event( context_id=workflowId, event_type="chatdata", data={ "type": "message", "createdAt": message_timestamp, "item": chat_message.dict() }, event_category="chat" )) except Exception as e: logger.debug(f"Could not emit message event: {e}") # Debug: Store message and documents for debugging - only if debug enabled storeDebugMessageAndDocuments(chat_message, self.currentUser, mandateId=self.mandateId, featureInstanceId=self.featureInstanceId) return chat_message except Exception as e: logger.error(f"Error creating workflow message: {str(e)}") return None def updateMessage(self, messageId: str, messageData: Dict[str, Any]) -> Optional[ChatMessage]: """Updates a workflow message if user has access to the workflow.""" try: # Ensure messageId is provided if not messageId: logger.error("No messageId provided for updateMessage") raise ValueError("messageId cannot be empty") # Check if message exists in database messages = self._getRecordset(ChatMessage, recordFilter={"id": messageId}) if not messages: logger.warning(f"Message with ID {messageId} does not exist in database") # If message doesn't exist but we have workflowId, create it if "workflowId" in messageData: workflowId = messageData.get("workflowId") # Check workflow access workflow = self.getWorkflow(workflowId) if not workflow: raise PermissionError(f"No access to workflow {workflowId}") if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") logger.info(f"Creating new message with ID {messageId} for workflow {workflowId}") return self.db.recordCreate(ChatMessage, messageData) else: logger.error(f"Workflow ID missing for new message {messageId}") return None # Update existing message existingMessage = messages[0] # Check workflow access workflowId = existingMessage.get("workflowId") workflow = self.getWorkflow(workflowId) if not workflow: raise PermissionError(f"No access to workflow {workflowId}") if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Use generic field separation based on ChatMessage model simpleFields, objectFields = self._separateObjectFields(ChatMessage, messageData) # Ensure required fields present for key in ["role", "agentName"]: if key not in simpleFields and key not in existingMessage: simpleFields[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset if 'id' not in simpleFields: simpleFields['id'] = messageId # Convert createdAt to startedAt if needed if "createdAt" in simpleFields and "startedAt" not in simpleFields: simpleFields["startedAt"] = simpleFields["createdAt"] del simpleFields["createdAt"] # Update the message with simple fields only updatedMessage = self.db.recordModify(ChatMessage, messageId, simpleFields) # Handle object field updates (documents, stats) inline if 'documents' in objectFields: documents_data = objectFields['documents'] try: for doc_data in documents_data: # Normalize to dict before mutation if isinstance(doc_data, ChatDocument): doc_dict = doc_data.model_dump() elif isinstance(doc_data, dict): doc_dict = dict(doc_data) else: try: doc_dict = ChatDocument(**doc_data).model_dump() except Exception: logger.error("Invalid document data type for message update") continue doc_dict["messageId"] = messageId self.createDocument(doc_dict) except Exception as e: logger.error(f"Error updating message documents: {str(e)}") if not updatedMessage: logger.warning(f"Failed to update message {messageId}") return None # Convert to ChatMessage model return ChatMessage(**updatedMessage) except Exception as e: logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True) raise ValueError(f"Error updating message {messageId}: {str(e)}") def deleteMessage(self, workflowId: str, messageId: str) -> bool: """Deletes a workflow message and all related data if user has access to the workflow.""" try: # Check workflow access workflow = self.getWorkflow(workflowId) if not workflow: logger.warning(f"No access to workflow {workflowId}") return False if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Check if the message exists (bypass RBAC -- workflow access already verified above) matchingMessages = self.db.getRecordset(ChatMessage, recordFilter={"id": messageId, "workflowId": workflowId}) if not matchingMessages: logger.warning(f"Message {messageId} for workflow {workflowId} not found") return False # CASCADE DELETE: Delete all related data first # 1. Delete message documents (but NOT the files themselves) # Bypass RBAC -- workflow access already verified, child records may have different _createdBy existing_docs = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) for doc in existing_docs: self.db.recordDelete(ChatDocument, doc["id"]) # 2. Finally delete the message itself success = self.db.recordDelete(ChatMessage, messageId) return success except Exception as e: logger.error(f"Error deleting message {messageId}: {str(e)}") return False def deleteFileFromMessage(self, workflowId: str, messageId: str, fileId: str) -> bool: """Removes a file reference from a message if user has access.""" try: # Check workflow access workflow = self.getWorkflow(workflowId) if not workflow: logger.warning(f"No access to workflow {workflowId}") return False if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): raise PermissionError(f"No permission to modify workflow {workflowId}") # Get documents for this message from normalized table # Bypass RBAC -- workflow access already verified, child records may have different _createdBy documents = self.db.getRecordset(ChatDocument, recordFilter={"messageId": messageId}) if not documents: # No ChatDocument records -- documents may be stored inline on the message (legacy). # The frontend handles removal optimistically, file itself is preserved. logger.debug(f"No ChatDocument records for message {messageId} -- inline/legacy data, nothing to delete") return True # Find and delete the specific document removed = False for doc in documents: docId = doc.get("id") fileIdValue = doc.get("fileId") # Flexible matching approach shouldRemove = ( (docId == fileId) or (fileIdValue == fileId) or (isinstance(docId, str) and str(fileId) in docId) or (isinstance(fileIdValue, str) and str(fileId) in fileIdValue) ) if shouldRemove: # Delete the document from normalized table success = self.db.recordDelete(ChatDocument, docId) if success: removed = True else: logger.warning(f"Failed to delete document {docId}") if not removed: logger.warning(f"No matching file {fileId} found in message {messageId}") return False return True except Exception as e: logger.error(f"Error removing file {fileId} from message {messageId}: {str(e)}") return False # Document methods def getDocuments(self, messageId: str) -> List[ChatDocument]: """Returns documents for a message from normalized table.""" try: documents = self._getRecordset(ChatDocument, recordFilter={"messageId": messageId}) return [ChatDocument(**doc) for doc in documents] except Exception as e: logger.error(f"Error getting message documents: {str(e)}") return [] def createDocument(self, documentData: Dict[str, Any]) -> ChatDocument: """Creates a document for a message in normalized table.""" try: # Note: Chat data is user-owned, no mandate/featureInstance context stored # mandateId/featureInstanceId removed from ChatDocument model # Validate and normalize document data to dict document = ChatDocument(**documentData) logger.debug(f"Creating document in database: fileName={document.fileName}, fileId={document.fileId}, messageId={document.messageId}") created = self.db.recordCreate(ChatDocument, document.model_dump()) if created: created_doc = ChatDocument(**created) logger.debug(f"Successfully created document in database: {created_doc.fileName} (id: {created_doc.id})") return created_doc else: logger.error(f"Failed to create document in database: recordCreate returned None for fileName={document.fileName}") return None except Exception as e: logger.error(f"Error creating message document: {str(e)}", exc_info=True) return None # Log methods def getLogs(self, workflowId: str, pagination: Optional[PaginationParams] = None) -> Union[List[ChatLog], PaginatedResult]: """ Returns logs for a workflow if user has access to the workflow. Supports optional pagination, sorting, and filtering. Args: workflowId: The workflow ID to get logs for pagination: Optional pagination parameters. If None, returns all items. Returns: If pagination is None: List[ChatLog] If pagination is provided: PaginatedResult with items and metadata """ # Check workflow access first (without calling getWorkflow to avoid circular reference) # Use RBAC filtering workflows = self._getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: if pagination is None: return [] return PaginatedResult(items=[], totalItems=0, totalPages=0) # Get logs for this workflow from normalized table logs = self._getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) # Convert raw logs to dict format for sorting/filtering logDicts = [] for log in logs: logDicts.append({ "id": log.get("id"), "workflowId": log.get("workflowId"), "message": log.get("message"), "type": log.get("type"), "timestamp": log.get("timestamp", getUtcTimestamp()), "agentName": log.get("agentName"), "status": log.get("status"), "progress": log.get("progress"), "mandateId": log.get("mandateId"), "userId": log.get("userId") }) # Apply default sorting by timestamp if no sort specified if pagination is None or not pagination.sort: logDicts.sort(key=lambda x: parseTimestamp(x.get("timestamp"), default=0)) # Apply filtering (if filters provided) if pagination and pagination.filters: logDicts = self._applyFilters(logDicts, pagination.filters) # Apply sorting (in order of sortFields) if pagination and pagination.sort: logDicts = self._applySorting(logDicts, pagination.sort) # If no pagination requested, return all items if pagination is None: return [ChatLog(**log) for log in logDicts] # Count total items after filters totalItems = len(logDicts) totalPages = math.ceil(totalItems / pagination.pageSize) if totalItems > 0 else 0 # Apply pagination (skip/limit) startIdx = (pagination.page - 1) * pagination.pageSize endIdx = startIdx + pagination.pageSize pagedLogDicts = logDicts[startIdx:endIdx] # Convert to model objects items = [ChatLog(**log) for log in pagedLogDicts] return PaginatedResult( items=items, totalItems=totalItems, totalPages=totalPages ) def createLog(self, logData: Dict[str, Any], event_manager=None) -> ChatLog: """Creates a log entry for a workflow if user has access.""" # Check workflow access workflowId = logData.get("workflowId") if not workflowId: logger.error("No workflowId provided for createLog") return None workflow = self.getWorkflow(workflowId) if not workflow: logger.warning(f"No access to workflow {workflowId}") return None if not self.checkRbacPermission(ChatWorkflow, "update", workflowId): logger.warning(f"No permission to modify workflow {workflowId}") return None # Make sure required fields are present if "timestamp" not in logData: logData["timestamp"] = getUtcTimestamp() # Note: Chat data is user-owned, no mandate/featureInstance context stored # mandateId/featureInstanceId removed from ChatLog model # Add status information if not present if "status" not in logData and "type" in logData: if logData["type"] == "error": logData["status"] = "error" else: logData["status"] = "running" # Add progress information if not present if "progress" not in logData: # Default progress values based on log type (0.0 to 1.0 format) if logData.get("type") == "info": logData["progress"] = 0.5 # Default middle progress elif logData.get("type") == "error": logData["progress"] = 1.0 # Error state - completed (failed) elif logData.get("type") == "warning": logData["progress"] = 0.5 # Default middle progress # Validate log data against ChatLog model try: log_model = ChatLog(**logData) except Exception as e: logger.error(f"Invalid log data: {str(e)}") return None # Create log in normalized table createdLog = self.db.recordCreate(ChatLog, log_model) # Emit log event for streaming (if event manager is provided) if event_manager: try: log_timestamp = parseTimestamp(createdLog.get("timestamp"), default=getUtcTimestamp()) # Emit log event in exact chatData format: {type, createdAt, item} asyncio.create_task(event_manager.emit_event( context_id=workflowId, event_type="chatdata", data={ "type": "log", "createdAt": log_timestamp, "item": ChatLog(**createdLog).dict() }, event_category="chat" )) except Exception as e: logger.debug(f"Could not emit log event: {e}") # Return validated ChatLog instance return ChatLog(**createdLog) def getUnifiedChatData(self, workflowId: str, afterTimestamp: Optional[float] = None, workflowCost: float = 0.0) -> Dict[str, Any]: """ Returns unified chat data (messages, logs) for a workflow in chronological order, plus workflowCost from billing transactions (single source of truth). """ # Check workflow access first # Use RBAC filtering workflows = self._getRecordset(ChatWorkflow, recordFilter={"id": workflowId}) if not workflows: return {"items": []} # Get all data types and filter in Python (PostgreSQL connector doesn't support $gt operators) items = [] # Get messages messages = self._getRecordset(ChatMessage, recordFilter={"workflowId": workflowId}) for msg in messages: # Apply timestamp filtering in Python msgTimestamp = parseTimestamp(msg.get("publishedAt"), default=getUtcTimestamp()) if afterTimestamp is not None and msgTimestamp <= afterTimestamp: continue # Load documents for each message documents = self.getDocuments(msg["id"]) # Create ChatMessage object with loaded documents chatMessage = ChatMessage( id=msg["id"], workflowId=msg["workflowId"], parentMessageId=msg.get("parentMessageId"), documents=documents, documentsLabel=msg.get("documentsLabel"), message=msg.get("message"), role=msg.get("role", "assistant"), status=msg.get("status", "step"), sequenceNr=msg.get("sequenceNr", 0), publishedAt=msg.get("publishedAt", getUtcTimestamp()), success=msg.get("success"), actionId=msg.get("actionId"), actionMethod=msg.get("actionMethod"), actionName=msg.get("actionName"), roundNumber=msg.get("roundNumber"), taskNumber=msg.get("taskNumber"), actionNumber=msg.get("actionNumber"), taskProgress=msg.get("taskProgress"), actionProgress=msg.get("actionProgress") ) # Use publishedAt as the timestamp for chronological ordering items.append({ "type": "message", "createdAt": msgTimestamp, "item": chatMessage }) # Get logs - return all logs with roundNumber if available logs = self._getRecordset(ChatLog, recordFilter={"workflowId": workflowId}) for log in logs: # Apply timestamp filtering in Python logTimestamp = parseTimestamp(log.get("timestamp"), default=getUtcTimestamp()) if afterTimestamp is not None and logTimestamp <= afterTimestamp: continue chatLog = ChatLog(**log) items.append({ "type": "log", "createdAt": logTimestamp, "item": chatLog }) # Sort all items by createdAt timestamp for chronological order items.sort(key=lambda x: parseTimestamp(x.get("createdAt"), default=0)) return {"items": items, "workflowCost": workflowCost} def getInterface(currentUser: Optional[User] = None, mandateId: Optional[str] = None, featureInstanceId: Optional[str] = None) -> 'ChatObjects': """ Returns a ChatObjects instance for the current user. Handles initialization of database and records. Args: currentUser: The authenticated user mandateId: The mandate ID from RequestContext (X-Mandate-Id header). Required. featureInstanceId: The feature instance ID from RequestContext (X-Feature-Instance-Id header). """ if not currentUser: raise ValueError("Invalid user context: user is required") effectiveMandateId = str(mandateId) if mandateId else None effectiveFeatureInstanceId = str(featureInstanceId) if featureInstanceId else None # Create context key including featureInstanceId for proper isolation contextKey = f"{effectiveMandateId}_{effectiveFeatureInstanceId}_{currentUser.id}" # Create new instance if not exists if contextKey not in _chatInterfaces: _chatInterfaces[contextKey] = ChatObjects(currentUser, mandateId=effectiveMandateId, featureInstanceId=effectiveFeatureInstanceId) else: # Update user context if needed _chatInterfaces[contextKey].setUserContext(currentUser, mandateId=effectiveMandateId, featureInstanceId=effectiveFeatureInstanceId) return _chatInterfaces[contextKey]