""" Interface to LucyDOM database and AI Connectors. Uses the JSON connector for data access with added language support. """ import os import logging import uuid from datetime import datetime from typing import Dict, Any, List, Optional, Union import importlib import hashlib # DYNAMIC PART: Connectors to the Interface from connectors.connector_db_json import DatabaseConnector from connectors.connector_aichat_openai import ChatService # Basic Configurations from modules.configuration import APP_CONFIG logger = logging.getLogger(__name__) # Custom exceptions for file handling class FileError(Exception): """Base class for file handling exceptions.""" pass class FileNotFoundError(FileError): """Exception raised when a file is not found.""" pass class FileStorageError(FileError): """Exception raised when there's an error storing a file.""" pass class FilePermissionError(FileError): """Exception raised when there's a permission issue with a file.""" pass class FileDeletionError(FileError): """Exception raised when there's an error deleting a file.""" pass class LucyDOMInterface: """ Interface to the LucyDOM database. Uses the JSON connector for data access. """ def __init__(self, mandate_id: int, user_id: int): """ Initializes the LucyDOM Interface with mandate and user context. Args: mandate_id: ID of the current mandate user_id: ID of the current user """ self.mandate_id = mandate_id self.user_id = user_id # Add language settings self.user_language = "en" # Default user language self.ai_service = None # Will be set externally # Import data model module try: self.model_module = importlib.import_module("modules.lucydom_model") logger.info("lucydom_model successfully imported") except ImportError as e: logger.error(f"Error importing lucydom_model: {e}") raise # Initialize database if needed self._initialize_database() def _initialize_database(self): """ Initializes the database with minimal objects for the logged-in user in the mandate, if it doesn't exist yet. No initialization without a valid user. Creates an initial dataset for each table defined in the data model. """ effective_mandate_id = self.mandate_id effective_user_id = self.user_id if effective_mandate_id is None or effective_user_id is None: #data available return self.db = DatabaseConnector( db_host=APP_CONFIG.get("DB_LUCYDOM_HOST"), db_database=APP_CONFIG.get("DB_LUCYDOM_DATABASE"), db_user=APP_CONFIG.get("DB_LUCYDOM_USER"), db_password=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"), mandate_id=self.mandate_id, user_id=self.user_id ) # Initialize standard prompts for different areas prompts = self.db.get_recordset("prompts") if not prompts: logger.info("Creating standard prompts") # Define standard prompts standard_prompts = [ { "mandate_id": effective_mandate_id, "user_id": effective_user_id, "content": "Research the current market trends and developments in [TOPIC]. Collect information about leading companies, innovative products or services, and current challenges. Present the results in a structured overview with relevant data and sources.", "name": "Web Research: Market Research" }, { "mandate_id": effective_mandate_id, "user_id": effective_user_id, "content": "Analyze the attached dataset on [TOPIC] and identify the most important trends, patterns, and anomalies. Perform statistical calculations to support your findings. Present the results in a clearly structured analysis and draw relevant conclusions.", "name": "Analysis: Data Analysis" }, { "mandate_id": effective_mandate_id, "user_id": effective_user_id, "content": "Create a detailed protocol of our meeting on [TOPIC]. Capture all discussed points, decisions made, and agreed measures. Structure the protocol clearly with agenda items, participant list, and clear responsibilities for follow-up actions.", "name": "Protocol: Meeting Minutes" }, { "mandate_id": effective_mandate_id, "user_id": effective_user_id, "content": "Develop a UI/UX design concept for [APPLICATION/WEBSITE]. Consider the target audience, main functions, and brand identity. Describe the visual design, navigation, interaction patterns, and information architecture. Explain how the design optimizes user-friendliness and user experience.", "name": "Design: UI/UX Design" } ] # Create prompts for prompt_data in standard_prompts: created_prompt = self.db.record_create("prompts", prompt_data) logger.info(f"Prompt '{prompt_data.get('name', 'Standard')}' was created with ID {created_prompt['id']}") # Language support methods def set_user_language(self, language_code: str): """Set the user's preferred language""" self.user_language = language_code logger.info(f"User language set to: {language_code}") async def call_ai(self, messages: List[Dict[str, str]], produce_user_answer: bool = False, temperature: float = None) -> str: """ Enhanced AI service call with language support Args: messages: List of message dictionaries produce_user_answer: Whether this response is for the end-user temperature: Optional temperature setting Returns: AI response text """ if not self.ai_service: logger.error("AI service not set in LucyDOMInterface") return "Error: AI service not available" # Add language instruction for user-facing responses if produce_user_answer and self.user_language: ltext= f"Please respond in '{self.user_language}' language." if messages and messages[0]["role"] == "system": if "language" not in messages[0]["content"].lower(): messages[0]["content"] = f"{ltext} {messages[0]['content']}" else: # Insert a system message with language instruction messages.insert(0, { "role": "system", "content": ltext }) # Call the AI service if temperature is not None: return await self.ai_service.call_api(messages, temperature=temperature) else: return await self.ai_service.call_api(messages) # Utilities def get_initial_id(self, table: str) -> Optional[int]: """ Returns the initial ID for a table. Args: table: Name of the table Returns: The initial ID or None if not present """ return self.db.get_initial_id(table) def _get_current_timestamp(self) -> str: """Returns the current timestamp in ISO format""" return datetime.now().isoformat() # Prompt methods def get_all_prompts(self) -> List[Dict[str, Any]]: """Returns all prompts for the current mandate""" return self.db.get_recordset("prompts") def get_prompt(self, prompt_id: int) -> Optional[Dict[str, Any]]: """Returns a prompt by its ID""" prompts = self.db.get_recordset("prompts", record_filter={"id": prompt_id}) if prompts: return prompts[0] return None def create_prompt(self, content: str, name: str) -> Dict[str, Any]: """Creates a new prompt""" prompt_data = { "mandate_id": self.mandate_id, "user_id": self.user_id, "content": content, "name": name, "created_at": self._get_current_timestamp() } return self.db.record_create("prompts", prompt_data) def update_prompt(self, prompt_id: int, content: str = None, name: str = None) -> Dict[str, Any]: """ Updates an existing prompt Args: prompt_id: ID of the prompt to update content: New content for the prompt Returns: The updated prompt object """ # Check if the prompt exists prompt = self.get_prompt(prompt_id) if not prompt: return None # Prepare data for update prompt_data = {} if content is not None: prompt_data["content"] = content if name is not None: prompt_data["name"] = name # Update prompt return self.db.record_modify("prompts", prompt_id, prompt_data) def delete_prompt(self, prompt_id: int) -> bool: """ Deletes a prompt from the database Args: prompt_id: ID of the prompt to delete Returns: True if the prompt was successfully deleted, otherwise False """ return self.db.record_delete("prompts", prompt_id) # File Utilities def calculate_file_hash(self, file_content: bytes) -> str: """Calculates a SHA-256 hash for the file content""" return hashlib.sha256(file_content).hexdigest() def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]: """Checks if a file with the same hash already exists""" files = self.db.get_recordset("files", record_filter={"file_hash": file_hash}) if files: return files[0] return None def get_mime_type(self, filename: str) -> str: """Determines the MIME type based on the file extension""" import os ext = os.path.splitext(filename)[1].lower()[1:] extension_to_mime = { "pdf": "application/pdf", "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", "doc": "application/msword", "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "xls": "application/vnd.ms-excel", "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", "ppt": "application/vnd.ms-powerpoint", "csv": "text/csv", "txt": "text/plain", "json": "application/json", "xml": "application/xml", "html": "text/html", "htm": "text/html", "jpg": "image/jpeg", "jpeg": "image/jpeg", "png": "image/png", "gif": "image/gif", "webp": "image/webp", "svg": "image/svg+xml", "py": "text/x-python", "js": "application/javascript", "css": "text/css" } return extension_to_mime.get(ext.lower(), "application/octet-stream") # File methods - metadata-based operations def get_all_files(self) -> List[Dict[str, Any]]: """ Returns all files for the current mandate without binary data. Returns: List of FileItem objects without binary data """ files = self.db.get_recordset("files") return files def get_file(self, file_id: int) -> Optional[Dict[str, Any]]: """ Returns a file by its ID, without binary data. Args: file_id: ID of the file Returns: FileItem without binary data or None if not found """ files = self.db.get_recordset("files", record_filter={"id": file_id}) if files: return files[0] return None def create_file(self, name: str, mime_type: str, size: int = None, file_hash: str = None) -> Dict[str, Any]: """ Creates a new file entry in the database without content. The actual file content is stored separately in the FileData table. Args: name: Name of the file mime_type: MIME type of the file size: Size of the file in bytes file_hash: Hash value of the file for deduplication Returns: The created FileItem object """ file_data = { "mandate_id": self.mandate_id, "user_id": self.user_id, "name": name, "mime_type": mime_type, "size": size, "file_hash": file_hash, "creation_date": self._get_current_timestamp() } return self.db.record_create("files", file_data) def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]: """ Updates the metadata of an existing file without affecting the binary data. Args: file_id: ID of the file to update update_data: Dictionary with fields to update Returns: The updated FileItem object """ # Check if the file exists file = self.get_file(file_id) if not file: raise FileNotFoundError(f"File with ID {file_id} not found") # Update file return self.db.record_modify("files", file_id, update_data) def delete_file(self, file_id: int) -> bool: """ Deletes a file from the database (metadata and content). Args: file_id: ID of the file Returns: True on success, False on error """ try: # Find the file in the database file = self.get_file(file_id) if not file: raise FileNotFoundError(f"File with ID {file_id} not found") # Check if the file belongs to the current mandate if file.get("mandate_id") != self.mandate_id: raise FilePermissionError(f"No permission to delete file {file_id}") # Check for other references to this file (by hash) file_hash = file.get("file_hash") if file_hash: other_references = [f for f in self.db.get_recordset("files", record_filter={"file_hash": file_hash}) if f.get("id") != file_id] # If other files reference this content, only delete the database entry for FileItem if other_references: logger.info(f"Other references to the file content found, only FileItem will be deleted: {file_id}") else: # Also delete the file content in the FileData table try: file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) if file_data_entries: self.db.record_delete("file_data", file_id) logger.info(f"FileData for file {file_id} deleted") except Exception as e: logger.warning(f"Error deleting FileData for file {file_id}: {str(e)}") # Delete the FileItem entry return self.db.record_delete("files", file_id) except FileNotFoundError as e: # Pass through FileNotFoundError raise except FilePermissionError as e: # Pass through FilePermissionError raise except Exception as e: logger.error(f"Error deleting file {file_id}: {str(e)}") raise FileDeletionError(f"Error deleting file: {str(e)}") # FileData methods - binary data operations def create_file_data(self, file_id: int, data: bytes) -> bool: """ Stores the binary data of a file in the database as a Base64 string. Args: file_id: ID of the associated file data: Binary data Returns: True on success, False on error """ try: import base64 # Convert binary data to base64 string if isinstance(data, bytes): encoded_data = base64.b64encode(data).decode('utf-8') logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encoded_data)}") else: logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...") # Try to convert to bytes if it's not already if isinstance(data, str): # Check if it might already be base64 encoded try: # See if it's valid base64 base64.b64decode(data) # If no error, assume it's already encoded encoded_data = data logger.info(f"Data appears to be already base64 encoded, using as is") except: # Not base64, so encode the string as bytes then to base64 encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') logger.info(f"Converted string to base64") else: # For other types, convert to string first encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8') logger.warning(f"Converted non-standard type to base64") # Create the file_data record with encoded data file_data = { "id": file_id, "data": encoded_data } self.db.record_create("file_data", file_data) logger.info(f"Successfully stored encoded data for file {file_id}") return True except Exception as e: logger.error(f"Error storing binary data for file {file_id}: {str(e)}") return False def get_file_data(self, file_id: int) -> Optional[bytes]: """ Returns the binary data of a file. Converts Base64 string from the database back to bytes. Args: file_id: ID of the file Returns: Binary data or None if not found """ import base64 file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) if file_data_entries and "data" in file_data_entries[0]: encoded_data = file_data_entries[0]["data"] try: # Check if it's a string (most likely base64) if isinstance(encoded_data, str): try: # Try to decode base64 binary_data = base64.b64decode(encoded_data) logger.debug(f"Successfully decoded base64 string to {len(binary_data)} bytes") return binary_data except Exception as e: logger.error(f"Failed to decode base64 data: {str(e)}") # If it's not valid base64, return as bytes return encoded_data.encode('utf-8') # If it's already bytes (shouldn't happen with model change) elif isinstance(encoded_data, bytes): logger.warning(f"Data was already bytes, no conversion needed") return encoded_data else: logger.error(f"Unexpected data type in database: {type(encoded_data)}") return None except Exception as e: logger.error(f"Error processing file data: {str(e)}") return None else: logger.warning(f"No data found for file ID {file_id}") return None def update_file_data(self, file_id: int, data: Union[bytes, str]) -> bool: """ Updates the binary data of a file in the database. Converts bytes to Base64 string for storage. Args: file_id: ID of the file data: New binary data or encoded data Returns: True on success, False on error """ try: import base64 # Convert data to base64 string if it's bytes if isinstance(data, bytes): encoded_data = base64.b64encode(data).decode('utf-8') logger.debug(f"Converted {len(data)} bytes to base64 string") elif isinstance(data, str): # Check if it might already be base64 encoded try: # See if it's valid base64 base64.b64decode(data) # If no error, assume it's already encoded encoded_data = data logger.debug(f"Data appears to be already base64 encoded, using as is") except: # Not base64, so encode the string as bytes then to base64 encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') logger.debug(f"Converted string to base64") else: # For other types, convert to string first encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8') logger.warning(f"Converted non-standard type to base64") # Check if a record already exists file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) if file_data_entries: # Update the existing record self.db.record_modify("file_data", file_id, {"data": encoded_data}) logger.info(f"Updated existing file data for file ID {file_id}") else: # Create a new record file_data = { "id": file_id, "data": encoded_data } self.db.record_create("file_data", file_data) logger.info(f"Created new file data for file ID {file_id}") return True except Exception as e: logger.error(f"Error updating binary data for file {file_id}: {str(e)}") return False def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]: """ Saves an uploaded file in the database. Metadata is stored in the 'files' table, Binary data in the 'file_data' table as a Base64 string. Args: file_content: Binary data of the file file_name: Name of the file Returns: Dictionary with metadata of the saved file """ try: # Debug: Log the start of the file upload process logger.info(f"Starting upload process for file: {file_name}") # Debug: Check if file_content is valid bytes if not isinstance(file_content, bytes): logger.error(f"Invalid file_content type: {type(file_content)}") raise ValueError(f"file_content must be bytes, got {type(file_content)}") # Calculate file hash for deduplication file_hash = self.calculate_file_hash(file_content) logger.debug(f"Calculated file hash: {file_hash}") # Check for duplicate existing_file = self.check_for_duplicate_file(file_hash) if existing_file: # Simply return the existing file metadata logger.info(f"Duplicate found for {file_name}: {existing_file['id']}") return existing_file # Determine MIME type mime_type = self.get_mime_type(file_name) # Determine file size file_size = len(file_content) # 1. Save metadata in the 'files' table logger.info(f"Saving file metadata to database for file: {file_name}") db_file = self.create_file( name=file_name, mime_type=mime_type, size=file_size, file_hash=file_hash ) # 2. Save binary data as Base64 string in the 'file_data' table logger.info(f"Saving file content to database for file: {file_name}") self.create_file_data(db_file["id"], file_content) # Debug: Export file to static folder self._export_file_to_static(file_content, db_file["id"], file_name) # DEBUG TODO # Debug: Verify database record was created if not db_file: logger.warning(f"Database record for file {file_name} was not created properly") else: logger.debug(f"Database record created for file {file_name}") logger.info(f"File upload process completed for: {file_name}") return db_file except Exception as e: logger.error(f"Error in save_uploaded_file for {file_name}: {str(e)}", exc_info=True) raise FileStorageError(f"Error saving file: {str(e)}") def download_file(self, file_id: int) -> Optional[Dict[str, Any]]: """ Returns a file for download, including binary data. Args: file_id: ID of the file Returns: Dictionary with file data and metadata or None if not found """ try: # 1. Get metadata from the 'files' table file = self.get_file(file_id) if not file: raise FileNotFoundError(f"File with ID {file_id} not found") # 2. Get binary data from the 'file_data' table file_content = self.get_file_data(file_id) if file_content is None: raise FileNotFoundError(f"Binary data for file with ID {file_id} not found") return { "id": file_id, "name": file.get("name", f"file_{file_id}"), "content_type": file.get("mime_type", "application/octet-stream"), "size": file.get("size", len(file_content)), "content": file_content } except FileNotFoundError as e: # Re-raise FileNotFoundError as is raise except Exception as e: logger.error(f"Error downloading file {file_id}: {str(e)}") raise FileError(f"Error downloading file: {str(e)}") def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str): debug_filename = f"{file_id}_{file_name}" with open(f"./static/{debug_filename}", 'wb') as f: f.write(file_content) # Workflow methods def get_all_workflows(self) -> List[Dict[str, Any]]: """Returns all workflows for the current mandate""" return self.db.get_recordset("workflows") def get_workflows_by_user(self, user_id: int) -> List[Dict[str, Any]]: """Returns all workflows for a user""" return self.db.get_recordset("workflows", record_filter={"user_id": user_id}) def get_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]: """Returns a workflow by its ID""" workflows = self.db.get_recordset("workflows", record_filter={"id": workflow_id}) if workflows: return workflows[0] return None def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]: """Creates a new workflow in the database""" # Make sure mandate_id and user_id are set if "mandate_id" not in workflow_data: workflow_data["mandate_id"] = self.mandate_id if "user_id" not in workflow_data: workflow_data["user_id"] = self.user_id # Set timestamp if not present current_time = self._get_current_timestamp() if "started_at" not in workflow_data: workflow_data["started_at"] = current_time if "last_activity" not in workflow_data: workflow_data["last_activity"] = current_time return self.db.record_create("workflows", workflow_data) def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]: """ Updates an existing workflow. Args: workflow_id: ID of the workflow to update workflow_data: New data for the workflow Returns: The updated workflow object """ # Check if the workflow exists workflow = self.get_workflow(workflow_id) if not workflow: return None # Set update time workflow_data["last_activity"] = self._get_current_timestamp() # Update workflow return self.db.record_modify("workflows", workflow_id, workflow_data) def delete_workflow(self, workflow_id: str) -> bool: """ Deletes a workflow from the database. Args: workflow_id: ID of the workflow to delete Returns: True on success, False if the workflow doesn't exist """ # Check if the workflow exists workflow = self.get_workflow(workflow_id) if not workflow: return False # Check if the user is the owner or has admin rights if workflow.get("user_id") != self.user_id: # Here could be a check for admin rights return False # Delete workflow return self.db.record_delete("workflows", workflow_id) # Workflow Messages def get_workflow_messages(self, workflow_id: str) -> List[Dict[str, Any]]: """Returns all messages of a workflow""" return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id}) def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]: """ Creates a new message for a workflow. Args: message_data: The message data Returns: The created message or None on error """ try: # Check if required fields are present required_fields = ["id", "workflow_id"] for field in required_fields: if field not in message_data: logger.error(f"Required field '{field}' missing in message_data") raise ValueError(f"Required field '{field}' missing in message data") # Validate that ID is not None if message_data["id"] is None: message_data["id"] = f"msg_{uuid.uuid4()}" logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}") # Ensure required fields are present if "started_at" not in message_data and "created_at" not in message_data: message_data["started_at"] = self._get_current_timestamp() if "created_at" in message_data and "started_at" not in message_data: message_data["started_at"] = message_data["created_at"] del message_data["created_at"] # Set status if not present if "status" not in message_data: message_data["status"] = "completed" # Set sequence number if not present if "sequence_no" not in message_data: # Get current messages to determine next sequence number existing_messages = self.get_workflow_messages(message_data["workflow_id"]) message_data["sequence_no"] = len(existing_messages) + 1 # Ensure role and agent_name are present if "role" not in message_data: message_data["role"] = "assistant" if message_data.get("agent_name") else "user" if "agent_name" not in message_data: message_data["agent_name"] = "" # Debug log for data to create logger.debug(f"Creating workflow message with data: {message_data}") # Create message in database created_message = self.db.record_create("workflow_messages", message_data) # Update workflow's message_ids if this is a new message if created_message: workflow_id = message_data["workflow_id"] workflow = self.get_workflow(workflow_id) if workflow: # Get current message_ids or initialize empty list message_ids = workflow.get("message_ids", []) # Add the new message ID if not already in the list if created_message["id"] not in message_ids: message_ids.append(created_message["id"]) self.update_workflow(workflow_id, {"message_ids": message_ids}) return created_message except Exception as e: logger.error(f"Error creating workflow message: {str(e)}") # Return None instead of raising to avoid cascading failures return None def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]: """ Updates an existing workflow message in the database. Args: message_id: ID of the message message_data: Data to update Returns: The updated message object or None on error """ try: # Debug info logger.debug(f"Updating message {message_id} in database") # Ensure message_id is provided if not message_id: logger.error("No message_id provided for update_workflow_message") raise ValueError("message_id cannot be empty") # Check if message exists in database messages = self.db.get_recordset("workflow_messages", record_filter={"id": message_id}) if not messages: logger.warning(f"Message with ID {message_id} does not exist in database") # If message doesn't exist but we have workflow_id, create it if "workflow_id" in message_data: logger.info(f"Creating new message with ID {message_id} for workflow {message_data.get('workflow_id')}") return self.db.record_create("workflow_messages", message_data) else: logger.error(f"Workflow ID missing for new message {message_id}") return None # Update existing message existing_message = messages[0] # Ensure required fields present for key in ["role", "agent_name"]: if key not in message_data and key not in existing_message: message_data[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset if 'id' not in message_data: message_data['id'] = message_id # Convert created_at to started_at if needed if "created_at" in message_data and "started_at" not in message_data: message_data["started_at"] = message_data["created_at"] del message_data["created_at"] # Update the message updated_message = self.db.record_modify("workflow_messages", message_id, message_data) if updated_message: logger.info(f"Message {message_id} updated successfully") else: logger.warning(f"Failed to update message {message_id}") return updated_message except Exception as e: logger.error(f"Error updating message {message_id}: {str(e)}", exc_info=True) # Re-raise with full information raise ValueError(f"Error updating message {message_id}: {str(e)}") def delete_workflow_message(self, workflow_id: str, message_id: str) -> bool: """ Deletes a message from a workflow in the database. Args: workflow_id: ID of the associated workflow message_id: ID of the message to delete Returns: True on success, False on error """ try: # Check if the message exists messages = self.get_workflow_messages(workflow_id) message = next((m for m in messages if m.get("id") == message_id), None) if not message: logger.warning(f"Message {message_id} for workflow {workflow_id} not found") return False # Delete the message from the database return self.db.record_delete("workflow_messages", message_id) except Exception as e: logger.error(f"Error deleting message {message_id}: {str(e)}") return False def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: int) -> bool: """ Removes a file reference from a message. The file itself is not deleted, only the reference in the message. Enhanced version with improved file matching. Args: workflow_id: ID of the associated workflow message_id: ID of the message file_id: ID of the file to remove Returns: True on success, False on error """ try: # Log operation logger.info(f"Removing file {file_id} from message {message_id} in workflow {workflow_id}") # Get all workflow messages all_messages = self.get_workflow_messages(workflow_id) logger.debug(f"Workflow {workflow_id} has {len(all_messages)} messages") # Try different approaches to find the message message = None # Exact match message = next((m for m in all_messages if m.get("id") == message_id), None) # Case-insensitive match if not message and isinstance(message_id, str): message = next((m for m in all_messages if isinstance(m.get("id"), str) and m.get("id").lower() == message_id.lower()), None) # Partial match (starts with) if not message and isinstance(message_id, str): message = next((m for m in all_messages if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None) if not message: logger.warning(f"Message {message_id} not found in workflow {workflow_id}") return False # Log the found message logger.info(f"Found message: {message.get('id')}") # Check if message has documents if "documents" not in message or not message["documents"]: logger.warning(f"No documents in message {message_id}") return False # Log existing documents documents = message.get("documents", []) logger.debug(f"Message has {len(documents)} documents") for i, doc in enumerate(documents): doc_id = doc.get("id", "unknown") file_id_value = doc.get("file_id", "unknown") logger.debug(f"Document {i}: doc_id={doc_id}, file_id={file_id_value}") # Create a new list of documents without the one to delete updated_documents = [] removed = False for doc in documents: doc_id = doc.get("id") file_id_value = doc.get("file_id") # Flexible matching approach should_remove = ( (doc_id == file_id) or (file_id_value == file_id) or (isinstance(doc_id, str) and str(file_id) in doc_id) or (isinstance(file_id_value, str) and str(file_id) in file_id_value) ) if should_remove: removed = True logger.info(f"Found file to remove: doc_id={doc_id}, file_id={file_id_value}") else: updated_documents.append(doc) if not removed: logger.warning(f"No matching file {file_id} found in message {message_id}") return False # Update message with modified documents array message_update = { "documents": updated_documents } # Apply the update directly to the database updated = self.db.record_modify("workflow_messages", message["id"], message_update) if updated: logger.info(f"Successfully removed file {file_id} from message {message_id}") return True else: logger.warning(f"Failed to update message {message_id} in database") return False except Exception as e: logger.error(f"Error removing file {file_id} from message {message_id}: {str(e)}") return False # Workflow Logs def get_workflow_logs(self, workflow_id: str) -> List[Dict[str, Any]]: """Returns all log entries for a workflow""" return self.db.get_recordset("workflow_logs", record_filter={"workflow_id": workflow_id}) def create_workflow_log(self, log_data: Dict[str, Any]) -> Dict[str, Any]: """Creates a new log entry for a workflow""" # Make sure required fields are present if "timestamp" not in log_data: log_data["timestamp"] = self._get_current_timestamp() # Add status information if not present if "status" not in log_data and "type" in log_data: if log_data["type"] == "error": log_data["status"] = "error" else: log_data["status"] = "running" # Add progress information if not present if "progress" not in log_data: # Default progress values based on log type if log_data.get("type") == "info": log_data["progress"] = 50 # Default middle progress elif log_data.get("type") == "error": log_data["progress"] = -1 # Error state elif log_data.get("type") == "warning": log_data["progress"] = 50 # Default middle progress return self.db.record_create("workflow_logs", log_data) # Workflow Management def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool: """ Saves the state of a workflow to the database. Workflow data is updated, but messages are stored separately. Args: workflow: The workflow object save_messages: Flag to determine if messages should be saved save_logs: Flag to determine if logs should be saved Returns: True on success, False on failure """ try: workflow_id = workflow.get("id") if not workflow_id: return False # Extract only the database-relevant workflow fields # IMPORTANT: Don't store messages in the workflow table! workflow_db_data = { "id": workflow_id, "mandate_id": workflow.get("mandate_id", self.mandate_id), "user_id": workflow.get("user_id", self.user_id), "name": workflow.get("name", f"Workflow {workflow_id}"), "status": workflow.get("status", "completed"), "started_at": workflow.get("started_at", self._get_current_timestamp()), "last_activity": workflow.get("last_activity", self._get_current_timestamp()), "data_stats": workflow.get("data_stats", {}) } # Check if workflow already exists existing_workflow = self.get_workflow(workflow_id) if existing_workflow: self.update_workflow(workflow_id, workflow_db_data) else: self.create_workflow(workflow_db_data) # Save messages if save_messages and "messages" in workflow: for message in workflow["messages"]: message_id = message.get("id") if not message_id: continue # Since each message is already saved with create_workflow_message, # we only need to check if updates are necessary # First, get existing message from database existing_messages = self.get_workflow_messages(workflow_id) existing_message = next((m for m in existing_messages if m.get("id") == message_id), None) if existing_message: # Check if updates are needed has_changes = False for key in ["role", "agent_name", "content", "status", "documents"]: if key in message and message.get(key) != existing_message.get(key): has_changes = True break if has_changes: # Extract only relevant data for the database message_data = { "role": message.get("role", existing_message.get("role", "unknown")), "content": message.get("content", existing_message.get("content", "")), "agent_name": message.get("agent_name", existing_message.get("agent_name", "")), "status": message.get("status", existing_message.get("status", "completed")), "documents": message.get("documents", existing_message.get("documents", [])) } self.update_workflow_message(message_id, message_data) else: # Message doesn't exist in database yet # It should have been saved via create_workflow_message # If not, log a warning logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database") # Save logs if save_logs and "logs" in workflow: # Get existing logs existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)} for log in workflow["logs"]: log_id = log.get("id") if not log_id: continue # Extract only relevant data for the database log_data = { "id": log_id, "workflow_id": workflow_id, "message": log.get("message", ""), "type": log.get("type", "info"), "timestamp": log.get("timestamp", self._get_current_timestamp()), "agent_name": log.get("agent_name", "(undefined)"), "status": log.get("status", "running"), "progress": log.get("progress", 50) } # Create or update log if log_id in existing_logs: self.db.record_modify("workflow_logs", log_id, log_data) else: self.db.record_create("workflow_logs", log_id, log_data) return True except Exception as e: logger.error(f"Error saving workflow state: {str(e)}") return False def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]: """ Loads the complete state of a workflow from the database. This includes the workflow itself, messages, and logs. Args: workflow_id: ID of the workflow to load Returns: The complete workflow object or None on error """ try: # Load base workflow workflow = self.get_workflow(workflow_id) if not workflow: return None # Log the workflow base retrieval logger.debug(f"Loaded base workflow {workflow_id} from database") # Load messages messages = self.get_workflow_messages(workflow_id) # Sort by sequence number messages.sort(key=lambda x: x.get("sequence_no", 0)) # Debug log for messages and document counts message_count = len(messages) logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}") # Check if message_ids exists and is valid message_ids = workflow.get("message_ids", []) if not message_ids or len(message_ids) != len(messages): # Rebuild message_ids from messages message_ids = [msg.get("id") for msg in messages] # Update in database self.update_workflow(workflow_id, {"message_ids": message_ids}) logger.info(f"Rebuilt message_ids for workflow {workflow_id}") # Log document counts for each message for msg in messages: doc_count = len(msg.get("documents", [])) if doc_count > 0: logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database") # Load logs logs = self.get_workflow_logs(workflow_id) # Sort by timestamp logs.sort(key=lambda x: x.get("timestamp", "")) # Assemble complete workflow object complete_workflow = workflow.copy() complete_workflow["messages"] = messages complete_workflow["message_ids"] = message_ids # Ensure message_ids is included complete_workflow["logs"] = logs return complete_workflow except Exception as e: logger.error(f"Error loading workflow state: {str(e)}") return None # Singleton factory for LucyDOMInterface instances per context _lucydom_interfaces = {} def get_lucydom_interface(mandate_id: int = 0, user_id: int = 0) -> LucyDOMInterface: """ Returns a LucyDOMInterface instance for the specified context. Reuses existing instances. """ context_key = f"{mandate_id}_{user_id}" if context_key not in _lucydom_interfaces: # Create new interface instance interface = LucyDOMInterface(mandate_id, user_id) # Initialize AI service ai_service = ChatService() interface.ai_service = ai_service # Directly set the attribute _lucydom_interfaces[context_key] = interface return _lucydom_interfaces[context_key] # Init get_lucydom_interface()