gateway/modules/lucydomInterface.py
2025-04-30 00:39:37 +02:00

1337 lines
No EOL
53 KiB
Python

"""
Interface to LucyDOM database and AI Connectors.
Uses the JSON connector for data access with added language support.
"""
import os
import logging
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional, Union
import importlib
import hashlib
from modules.mimeUtils import isTextMimeType, determineContentEncoding
# DYNAMIC PART: Connectors to the Interface
from connectors.connectorDbJson import DatabaseConnector
from connectors.connectorAiOpenai import ChatService
# Basic Configurations
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
# Custom exceptions for file handling
class FileError(Exception):
"""Base class for file handling exceptions."""
pass
class FileNotFoundError(FileError):
"""Exception raised when a file is not found."""
pass
class FileStorageError(FileError):
"""Exception raised when there's an error storing a file."""
pass
class FilePermissionError(FileError):
"""Exception raised when there's a permission issue with a file."""
pass
class FileDeletionError(FileError):
"""Exception raised when there's an error deleting a file."""
pass
class LucyDOMInterface:
"""
Interface to the LucyDOM database.
Uses the JSON connector for data access.
"""
def __init__(self, mandateId: int, userId: int):
"""
Initializes the LucyDOM Interface with mandate and user context.
Args:
mandateId: ID of the current mandate
userId: ID of the current user
"""
self.mandateId = mandateId
self.userId = userId
# Add language settings
self.userLanguage = "en" # Default user language
self.aiService = None # Will be set externally
# Import data model module
try:
self.modelModule = importlib.import_module("modules.lucydomModel")
logger.info("lucydomModel successfully imported")
except ImportError as e:
logger.error(f"Error importing lucydomModel: {e}")
raise
# Initialize database if needed
self._initializeDatabase()
def _initializeDatabase(self):
"""
Initializes the database with minimal objects for the logged-in user in the mandate, if it doesn't exist yet.
No initialization without a valid user.
Creates an initial dataset for each table defined in the data model.
"""
effectiveMandateId = self.mandateId
effectiveUserId = self.userId
if effectiveMandateId is None or effectiveUserId is None:
#data available
return
self.db = DatabaseConnector(
dbHost=APP_CONFIG.get("DB_LUCYDOM_HOST"),
dbDatabase=APP_CONFIG.get("DB_LUCYDOM_DATABASE"),
dbUser=APP_CONFIG.get("DB_LUCYDOM_USER"),
dbPassword=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"),
mandateId=self.mandateId,
userId=self.userId,
skipInitialIdLookup=True
)
# Initialize standard prompts for different areas
prompts = self.db.getRecordset("prompts")
if not prompts:
logger.info("Creating standard prompts")
# Define standard prompts
standardPrompts = [
{
"mandateId": effectiveMandateId,
"userId": effectiveUserId,
"content": "Research the current market trends and developments in [TOPIC]. Collect information about leading companies, innovative products or services, and current challenges. Present the results in a structured overview with relevant data and sources.",
"name": "Web Research: Market Research"
},
{
"mandateId": effectiveMandateId,
"userId": effectiveUserId,
"content": "Analyze the attached dataset on [TOPIC] and identify the most important trends, patterns, and anomalies. Perform statistical calculations to support your findings. Present the results in a clearly structured analysis and draw relevant conclusions.",
"name": "Analysis: Data Analysis"
},
{
"mandateId": effectiveMandateId,
"userId": effectiveUserId,
"content": "Create a detailed protocol of our meeting on [TOPIC]. Capture all discussed points, decisions made, and agreed measures. Structure the protocol clearly with agenda items, participant list, and clear responsibilities for follow-up actions.",
"name": "Protocol: Meeting Minutes"
},
{
"mandateId": effectiveMandateId,
"userId": effectiveUserId,
"content": "Develop a UI/UX design concept for [APPLICATION/WEBSITE]. Consider the target audience, main functions, and brand identity. Describe the visual design, navigation, interaction patterns, and information architecture. Explain how the design optimizes user-friendliness and user experience.",
"name": "Design: UI/UX Design"
}
]
# Create prompts
for promptData in standardPrompts:
createdPrompt = self.db.recordCreate("prompts", promptData)
logger.info(f"Prompt '{promptData.get('name', 'Standard')}' was created with ID {createdPrompt['id']}")
# Language support methods
def setUserLanguage(self, languageCode: str):
"""Set the user's preferred language"""
self.userLanguage = languageCode
logger.info(f"User language set to: {languageCode}")
async def callAi(self, messages: List[Dict[str, str]], produceUserAnswer: bool = False, temperature: float = None) -> str:
"""
Enhanced AI service call with language support
Args:
messages: List of message dictionaries
produceUserAnswer: Whether this response is for the end-user
temperature: Optional temperature setting
Returns:
AI response text
"""
if not self.aiService:
logger.error("AI service not set in LucyDOMInterface")
return "Error: AI service not available"
# Add language instruction for user-facing responses
if produceUserAnswer and self.userLanguage:
ltext= f"Please respond in '{self.userLanguage}' language."
if messages and messages[0]["role"] == "system":
if "language" not in messages[0]["content"].lower():
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
else:
# Insert a system message with language instruction
messages.insert(0, {
"role": "system",
"content": ltext
})
# Call the AI service
if temperature is not None:
return await self.aiService.callApi(messages, temperature=temperature)
else:
return await self.aiService.callApi(messages)
# Utilities
def getInitialId(self, table: str) -> Optional[int]:
"""
Returns the initial ID for a table.
Args:
table: Name of the table
Returns:
The initial ID or None if not present
"""
return self.db.getInitialId(table)
def _getCurrentTimestamp(self) -> str:
"""Returns the current timestamp in ISO format"""
return datetime.now().isoformat()
# Prompt methods
def getAllPrompts(self) -> List[Dict[str, Any]]:
"""Returns all prompts for the current mandate"""
return self.db.getRecordset("prompts")
def getPrompt(self, promptId: int) -> Optional[Dict[str, Any]]:
"""Returns a prompt by its ID"""
prompts = self.db.getRecordset("prompts", recordFilter={"id": promptId})
if prompts:
return prompts[0]
return None
def createPrompt(self, content: str, name: str) -> Dict[str, Any]:
"""Creates a new prompt"""
promptData = {
"mandateId": self.mandateId,
"userId": self.userId,
"content": content,
"name": name,
"createdAt": self._getCurrentTimestamp()
}
return self.db.recordCreate("prompts", promptData)
def updatePrompt(self, promptId: int, content: str = None, name: str = None) -> Dict[str, Any]:
"""
Updates an existing prompt
Args:
promptId: ID of the prompt to update
content: New content for the prompt
Returns:
The updated prompt object
"""
# Check if the prompt exists
prompt = self.getPrompt(promptId)
if not prompt:
return None
# Prepare data for update
promptData = {}
if content is not None:
promptData["content"] = content
if name is not None:
promptData["name"] = name
# Update prompt
return self.db.recordModify("prompts", promptId, promptData)
def deletePrompt(self, promptId: int) -> bool:
"""
Deletes a prompt from the database
Args:
promptId: ID of the prompt to delete
Returns:
True if the prompt was successfully deleted, otherwise False
"""
return self.db.recordDelete("prompts", promptId)
# File Utilities
def calculateFileHash(self, fileContent: bytes) -> str:
"""Calculates a SHA-256 hash for the file content"""
return hashlib.sha256(fileContent).hexdigest()
def checkForDuplicateFile(self, fileHash: str) -> Optional[Dict[str, Any]]:
"""Checks if a file with the same hash already exists"""
files = self.db.getRecordset("files", recordFilter={"fileHash": fileHash})
if files:
return files[0]
return None
def getMimeType(self, filename: str) -> str:
"""Determines the MIME type based on the file extension"""
import os
ext = os.path.splitext(filename)[1].lower()[1:]
extensionToMime = {
"pdf": "application/pdf",
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"doc": "application/msword",
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
"xls": "application/vnd.ms-excel",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
"ppt": "application/vnd.ms-powerpoint",
"csv": "text/csv",
"txt": "text/plain",
"json": "application/json",
"xml": "application/xml",
"html": "text/html",
"htm": "text/html",
"jpg": "image/jpeg",
"jpeg": "image/jpeg",
"png": "image/png",
"gif": "image/gif",
"webp": "image/webp",
"svg": "image/svg+xml",
"py": "text/x-python",
"js": "application/javascript",
"css": "text/css"
}
return extensionToMime.get(ext.lower(), "application/octet-stream")
# File methods - metadata-based operations
def getAllFiles(self) -> List[Dict[str, Any]]:
"""
Returns all files for the current mandate without binary data.
Returns:
List of FileItem objects without binary data
"""
files = self.db.getRecordset("files")
return files
def getFile(self, fileId: int) -> Optional[Dict[str, Any]]:
"""
Returns a file by its ID, without binary data.
Args:
fileId: ID of the file
Returns:
FileItem without binary data or None if not found
"""
files = self.db.getRecordset("files", recordFilter={"id": fileId})
if files:
return files[0]
return None
def createFile(self, name: str, mimeType: str, size: int = None, fileHash: str = None) -> Dict[str, Any]:
"""
Creates a new file entry in the database without content.
The actual file content is stored separately in the FileData table.
Args:
name: Name of the file
mimeType: MIME type of the file
size: Size of the file in bytes
fileHash: Hash value of the file for deduplication
Returns:
The created FileItem object
"""
fileData = {
"mandateId": self.mandateId,
"userId": self.userId,
"name": name,
"mimeType": mimeType,
"size": size,
"fileHash": fileHash,
"creationDate": self._getCurrentTimestamp()
}
return self.db.recordCreate("files", fileData)
def updateFile(self, fileId: int, updateData: Dict[str, Any]) -> Dict[str, Any]:
"""
Updates the metadata of an existing file without affecting the binary data.
Args:
fileId: ID of the file to update
updateData: Dictionary with fields to update
Returns:
The updated FileItem object
"""
# Check if the file exists
file = self.getFile(fileId)
if not file:
raise FileNotFoundError(f"File with ID {fileId} not found")
# Update file
return self.db.recordModify("files", fileId, updateData)
def deleteFile(self, fileId: int) -> bool:
"""
Deletes a file from the database (metadata and content).
Args:
fileId: ID of the file
Returns:
True on success, False on error
"""
try:
# Find the file in the database
file = self.getFile(fileId)
if not file:
raise FileNotFoundError(f"File with ID {fileId} not found")
# Check if the file belongs to the current mandate
if file.get("mandateId") != self.mandateId:
raise FilePermissionError(f"No permission to delete file {fileId}")
# Check for other references to this file (by hash)
fileHash = file.get("fileHash")
if fileHash:
otherReferences = [f for f in self.db.getRecordset("files", recordFilter={"fileHash": fileHash})
if f.get("id") != fileId]
# If other files reference this content, only delete the database entry for FileItem
if otherReferences:
logger.info(f"Other references to the file content found, only FileItem will be deleted: {fileId}")
else:
# Also delete the file content in the FileData table
try:
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
if fileDataEntries:
self.db.recordDelete("fileData", fileId)
logger.info(f"FileData for file {fileId} deleted")
except Exception as e:
logger.warning(f"Error deleting FileData for file {fileId}: {str(e)}")
# Delete the FileItem entry
return self.db.recordDelete("files", fileId)
except FileNotFoundError as e:
# Pass through FileNotFoundError
raise
except FilePermissionError as e:
# Pass through FilePermissionError
raise
except Exception as e:
logger.error(f"Error deleting file {fileId}: {str(e)}")
raise FileDeletionError(f"Error deleting file: {str(e)}")
# FileData methods - data operations
"""
This contains the modified file handling methods for the LucyDOMInterface class
to implement consistent handling of base64 encoding flags.
"""
def createFileData(self, fileId: int, data: bytes) -> bool:
"""
Stores the binary data of a file in the database, using base64 encoding for binary files.
Always sets the base64Encoded flag appropriately.
Args:
fileId: ID of the associated file
data: Binary data
Returns:
True on success, False on error
"""
try:
import base64
# Check the file metadata to determine if this should be stored as text or base64
file = self.getFile(fileId)
if not file:
logger.error(f"File with ID {fileId} not found when storing data")
return False
# Determine if this is a text-based format that should be stored as text
mimeType = file.get("mimeType", "application/octet-stream")
isTextFormat = isTextMimeType(mimeType)
base64Encoded = False
fileData = None
if isTextFormat:
# Try to decode as text
try:
# Convert bytes to text
textContent = data.decode('utf-8')
fileData = textContent
base64Encoded = False
logger.debug(f"Stored file {fileId} as text")
except UnicodeDecodeError:
# Fallback to base64 if text decoding fails
encodedData = base64.b64encode(data).decode('utf-8')
fileData = encodedData
base64Encoded = True
logger.warning(f"Failed to decode text file {fileId}, falling back to base64")
else:
# Binary format - always use base64
encodedData = base64.b64encode(data).decode('utf-8')
fileData = encodedData
base64Encoded = True
logger.debug(f"Stored file {fileId} as base64")
# Create the fileData record with data and encoding flag
fileDataObj = {
"id": fileId,
"data": fileData,
"base64Encoded": base64Encoded
}
self.db.recordCreate("fileData", fileDataObj)
logger.info(f"Successfully stored data for file {fileId} (base64Encoded: {base64Encoded})")
return True
except Exception as e:
logger.error(f"Error storing data for file {fileId}: {str(e)}")
return False
def getFileData(self, fileId: int) -> Optional[bytes]:
"""
Returns the binary data of a file.
Uses the base64Encoded flag to determine if decoding is necessary.
Args:
fileId: ID of the file
Returns:
Binary data or None if not found
"""
import base64
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
if not fileDataEntries:
logger.warning(f"No data found for file ID {fileId}")
return None
fileDataEntry = fileDataEntries[0]
if "data" not in fileDataEntry:
logger.warning(f"No data field in file data for ID {fileId}")
return None
data = fileDataEntry["data"]
base64Encoded = fileDataEntry.get("base64Encoded", False)
try:
if base64Encoded:
# Decode base64 to bytes
return base64.b64decode(data)
else:
# Convert text to bytes
return data.encode('utf-8')
except Exception as e:
logger.error(f"Error processing file data for {fileId}: {str(e)}")
return None
def updateFileData(self, fileId: int, data: Union[bytes, str]) -> bool:
"""
Updates the binary data of a file in the database.
Handles base64 encoding based on the file type.
Args:
fileId: ID of the file
data: New binary data or text content
Returns:
True on success, False on error
"""
try:
import base64
# Check file metadata to determine if this should be stored as text or base64
file = self.getFile(fileId)
if not file:
logger.error(f"File with ID {fileId} not found when updating data")
return False
# Determine if this is a text-based format that should be stored as text
mimeType = file.get("mimeType", "application/octet-stream")
isTextFormat = (
mimeType.startswith("text/") or
mimeType in [
"application/json",
"application/xml",
"application/javascript",
"application/x-python",
"image/svg+xml"
]
)
base64Encoded = False
fileData = None
# Convert input data to the right format based on its type and the file's format
if isinstance(data, bytes):
if isTextFormat:
try:
# Try to convert bytes to text
fileData = data.decode('utf-8')
base64Encoded = False
except UnicodeDecodeError:
# Fallback to base64 if text decoding fails
fileData = base64.b64encode(data).decode('utf-8')
base64Encoded = True
else:
# Binary format - use base64
fileData = base64.b64encode(data).decode('utf-8')
base64Encoded = True
elif isinstance(data, str):
if isTextFormat:
# Text format - store as text
fileData = data
base64Encoded = False
else:
# Check if it's already base64 encoded
try:
# Try to decode as base64 to validate
base64.b64decode(data)
fileData = data
base64Encoded = True
except:
# Not valid base64, encode the string
fileData = base64.b64encode(data.encode('utf-8')).decode('utf-8')
base64Encoded = True
else:
# Convert to string first
stringData = str(data)
if isTextFormat:
fileData = stringData
base64Encoded = False
else:
fileData = base64.b64encode(stringData.encode('utf-8')).decode('utf-8')
base64Encoded = True
# Check if a record already exists
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
dataUpdate = {
"data": fileData,
"base64Encoded": base64Encoded
}
if fileDataEntries:
# Update the existing record
self.db.recordModify("fileData", fileId, dataUpdate)
logger.info(f"Updated file data for file ID {fileId} (base64Encoded: {base64Encoded})")
else:
# Create a new record
dataUpdate["id"] = fileId
self.db.recordCreate("fileData", dataUpdate)
logger.info(f"Created new file data for file ID {fileId} (base64Encoded: {base64Encoded})")
return True
except Exception as e:
logger.error(f"Error updating data for file {fileId}: {str(e)}")
return False
def saveUploadedFile(self, fileContent: bytes, fileName: str) -> Dict[str, Any]:
"""
Saves an uploaded file in the database.
Metadata is stored in the 'files' table,
Binary data in the 'fileData' table with the appropriate base64Encoded flag.
Args:
fileContent: Binary data of the file
fileName: Name of the file
Returns:
Dictionary with metadata of the saved file
"""
try:
# Debug: Log the start of the file upload process
logger.info(f"Starting upload process for file: {fileName}")
# Debug: Check if fileContent is valid bytes
if not isinstance(fileContent, bytes):
logger.error(f"Invalid fileContent type: {type(fileContent)}")
raise ValueError(f"fileContent must be bytes, got {type(fileContent)}")
# Calculate file hash for deduplication
fileHash = self.calculateFileHash(fileContent)
logger.debug(f"Calculated file hash: {fileHash}")
# Check for duplicate
existingFile = self.checkForDuplicateFile(fileHash)
if existingFile:
# Simply return the existing file metadata
logger.info(f"Duplicate found for {fileName}: {existingFile['id']}")
return existingFile
# Determine MIME type
mimeType = self.getMimeType(fileName)
# Determine file size
fileSize = len(fileContent)
# 1. Save metadata in the 'files' table
logger.info(f"Saving file metadata to database for file: {fileName}")
dbFile = self.createFile(
name=fileName,
mimeType=mimeType,
size=fileSize,
fileHash=fileHash
)
# 2. Save binary data with appropriate base64 encoding based on file type
logger.info(f"Saving file content to database for file: {fileName}")
self.createFileData(dbFile["id"], fileContent)
# Debug: Export file to static folder
self._exportFileToStatic(fileContent, dbFile["id"], fileName) # DEBUG TODO
# Debug: Verify database record was created
if not dbFile:
logger.warning(f"Database record for file {fileName} was not created properly")
else:
logger.debug(f"Database record created for file {fileName}")
logger.info(f"File upload process completed for: {fileName}")
return dbFile
except Exception as e:
logger.error(f"Error in saveUploadedFile for {fileName}: {str(e)}", exc_info=True)
raise FileStorageError(f"Error saving file: {str(e)}")
def downloadFile(self, fileId: int) -> Optional[Dict[str, Any]]:
"""
Returns a file for download, including binary data.
Uses the base64Encoded flag to determine how to process the file data.
Args:
fileId: ID of the file
Returns:
Dictionary with file data and metadata or None if not found
"""
try:
# 1. Get metadata from the 'files' table
file = self.getFile(fileId)
if not file:
raise FileNotFoundError(f"File with ID {fileId} not found")
# 2. Get binary data from the 'fileData' table using the new flag-aware method
fileContent = self.getFileData(fileId)
if fileContent is None:
raise FileNotFoundError(f"Binary data for file with ID {fileId} not found")
return {
"id": fileId,
"name": file.get("name", f"file_{fileId}"),
"contentType": file.get("mimeType", "application/octet-stream"),
"size": file.get("size", len(fileContent)),
"content": fileContent
}
except FileNotFoundError as e:
# Re-raise FileNotFoundError as is
raise
except Exception as e:
logger.error(f"Error downloading file {fileId}: {str(e)}")
raise FileError(f"Error downloading file: {str(e)}")
def _exportFileToStatic(self, fileContent: bytes, fileId: int, fileName: str):
debugFilename = f"{fileId}_{fileName}"
with open(f"./static/{debugFilename}", 'wb') as f:
f.write(fileContent)
# Workflow methods
def getAllWorkflows(self) -> List[Dict[str, Any]]:
"""Returns all workflows for the current mandate"""
return self.db.getRecordset("workflows")
def getWorkflowsByUser(self, userId: int) -> List[Dict[str, Any]]:
"""Returns all workflows for a user"""
return self.db.getRecordset("workflows", recordFilter={"userId": userId})
def getWorkflow(self, workflowId: str) -> Optional[Dict[str, Any]]:
"""Returns a workflow by its ID"""
workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId})
if workflows:
return workflows[0]
return None
def createWorkflow(self, workflowData: Dict[str, Any]) -> Dict[str, Any]:
"""Creates a new workflow in the database"""
# Make sure mandateId and userId are set
if "mandateId" not in workflowData:
workflowData["mandateId"] = self.mandateId
if "userId" not in workflowData:
workflowData["userId"] = self.userId
# Set timestamp if not present
currentTime = self._getCurrentTimestamp()
if "startedAt" not in workflowData:
workflowData["startedAt"] = currentTime
if "lastActivity" not in workflowData:
workflowData["lastActivity"] = currentTime
return self.db.recordCreate("workflows", workflowData)
def updateWorkflow(self, workflowId: str, workflowData: Dict[str, Any]) -> Dict[str, Any]:
"""
Updates an existing workflow.
Args:
workflowId: ID of the workflow to update
workflowData: New data for the workflow
Returns:
The updated workflow object
"""
# Check if the workflow exists
workflow = self.getWorkflow(workflowId)
if not workflow:
return None
# Set update time
workflowData["lastActivity"] = self._getCurrentTimestamp()
# Update workflow
return self.db.recordModify("workflows", workflowId, workflowData)
def deleteWorkflow(self, workflowId: str) -> bool:
"""
Deletes a workflow from the database.
Args:
workflowId: ID of the workflow to delete
Returns:
True on success, False if the workflow doesn't exist
"""
# Check if the workflow exists
workflow = self.getWorkflow(workflowId)
if not workflow:
return False
# Check if the user is the owner or has admin rights
if workflow.get("userId") != self.userId:
# Here could be a check for admin rights
return False
# Delete workflow
return self.db.recordDelete("workflows", workflowId)
# Workflow Messages
def getWorkflowMessages(self, workflowId: str) -> List[Dict[str, Any]]:
"""Returns all messages of a workflow"""
return self.db.getRecordset("workflowMessages", recordFilter={"workflowId": workflowId})
def createWorkflowMessage(self, messageData: Dict[str, Any]) -> Dict[str, Any]:
"""
Creates a new message for a workflow.
Args:
messageData: The message data
Returns:
The created message or None on error
"""
try:
# Check if required fields are present
requiredFields = ["id", "workflowId"]
for field in requiredFields:
if field not in messageData:
logger.error(f"Required field '{field}' missing in messageData")
raise ValueError(f"Required field '{field}' missing in message data")
# Validate that ID is not None
if messageData["id"] is None:
messageData["id"] = f"msg_{uuid.uuid4()}"
logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}")
# Ensure required fields are present
if "startedAt" not in messageData and "createdAt" not in messageData:
messageData["startedAt"] = self._getCurrentTimestamp()
if "createdAt" in messageData and "startedAt" not in messageData:
messageData["startedAt"] = messageData["createdAt"]
del messageData["createdAt"]
# Set status if not present
if "status" not in messageData:
messageData["status"] = "completed"
# Set sequence number if not present
if "sequenceNo" not in messageData:
# Get current messages to determine next sequence number
existingMessages = self.getWorkflowMessages(messageData["workflowId"])
messageData["sequenceNo"] = len(existingMessages) + 1
# Ensure role and agentName are present
if "role" not in messageData:
messageData["role"] = "assistant" if messageData.get("agentName") else "user"
if "agentName" not in messageData:
messageData["agentName"] = ""
# Debug log for data to create
logger.debug(f"Creating workflow message with data: {messageData}")
# Create message in database
createdMessage = self.db.recordCreate("workflowMessages", messageData)
# Update workflow's messageIds if this is a new message
if createdMessage:
workflowId = messageData["workflowId"]
workflow = self.getWorkflow(workflowId)
if workflow:
# Get current messageIds or initialize empty list
messageIds = workflow.get("messageIds", [])
# Add the new message ID if not already in the list
if createdMessage["id"] not in messageIds:
messageIds.append(createdMessage["id"])
self.updateWorkflow(workflowId, {"messageIds": messageIds})
return createdMessage
except Exception as e:
logger.error(f"Error creating workflow message: {str(e)}")
# Return None instead of raising to avoid cascading failures
return None
def updateWorkflowMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]:
"""
Updates an existing workflow message in the database.
Args:
messageId: ID of the message
messageData: Data to update
Returns:
The updated message object or None on error
"""
try:
# Debug info
logger.debug(f"Updating message {messageId} in database")
# Ensure messageId is provided
if not messageId:
logger.error("No messageId provided for updateWorkflowMessage")
raise ValueError("messageId cannot be empty")
# Check if message exists in database
messages = self.db.getRecordset("workflowMessages", recordFilter={"id": messageId})
if not messages:
logger.warning(f"Message with ID {messageId} does not exist in database")
# If message doesn't exist but we have workflowId, create it
if "workflowId" in messageData:
logger.info(f"Creating new message with ID {messageId} for workflow {messageData.get('workflowId')}")
return self.db.recordCreate("workflowMessages", messageData)
else:
logger.error(f"Workflow ID missing for new message {messageId}")
return None
# Update existing message
existingMessage = messages[0]
# Ensure required fields present
for key in ["role", "agentName"]:
if key not in messageData and key not in existingMessage:
messageData[key] = "assistant" if key == "role" else ""
# Ensure ID is in the dataset
if 'id' not in messageData:
messageData['id'] = messageId
# Convert createdAt to startedAt if needed
if "createdAt" in messageData and "startedAt" not in messageData:
messageData["startedAt"] = messageData["createdAt"]
del messageData["createdAt"]
# Update the message
updatedMessage = self.db.recordModify("workflowMessages", messageId, messageData)
if updatedMessage:
logger.info(f"Message {messageId} updated successfully")
else:
logger.warning(f"Failed to update message {messageId}")
return updatedMessage
except Exception as e:
logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True)
# Re-raise with full information
raise ValueError(f"Error updating message {messageId}: {str(e)}")
def deleteWorkflowMessage(self, workflowId: str, messageId: str) -> bool:
"""
Deletes a message from a workflow in the database.
Args:
workflowId: ID of the associated workflow
messageId: ID of the message to delete
Returns:
True on success, False on error
"""
try:
# Check if the message exists
messages = self.getWorkflowMessages(workflowId)
message = next((m for m in messages if m.get("id") == messageId), None)
if not message:
logger.warning(f"Message {messageId} for workflow {workflowId} not found")
return False
# Delete the message from the database
return self.db.recordDelete("workflowMessages", messageId)
except Exception as e:
logger.error(f"Error deleting message {messageId}: {str(e)}")
return False
def deleteFileFromMessage(self, workflowId: str, messageId: str, fileId: int) -> bool:
"""
Removes a file reference from a message.
The file itself is not deleted, only the reference in the message.
Enhanced version with improved file matching.
Args:
workflowId: ID of the associated workflow
messageId: ID of the message
fileId: ID of the file to remove
Returns:
True on success, False on error
"""
try:
# Log operation
logger.info(f"Removing file {fileId} from message {messageId} in workflow {workflowId}")
# Get all workflow messages
allMessages = self.getWorkflowMessages(workflowId)
logger.debug(f"Workflow {workflowId} has {len(allMessages)} messages")
# Try different approaches to find the message
message = None
# Exact match
message = next((m for m in allMessages if m.get("id") == messageId), None)
# Case-insensitive match
if not message and isinstance(messageId, str):
message = next((m for m in allMessages
if isinstance(m.get("id"), str) and m.get("id").lower() == messageId.lower()), None)
# Partial match (starts with)
if not message and isinstance(messageId, str):
message = next((m for m in allMessages
if isinstance(m.get("id"), str) and m.get("id").startswith(messageId)), None)
if not message:
logger.warning(f"Message {messageId} not found in workflow {workflowId}")
return False
# Log the found message
logger.info(f"Found message: {message.get('id')}")
# Check if message has documents
if "documents" not in message or not message["documents"]:
logger.warning(f"No documents in message {messageId}")
return False
# Log existing documents
documents = message.get("documents", [])
logger.debug(f"Message has {len(documents)} documents")
for i, doc in enumerate(documents):
docId = doc.get("id", "unknown")
fileIdValue = doc.get("fileId", "unknown")
logger.debug(f"Document {i}: docId={docId}, fileId={fileIdValue}")
# Create a new list of documents without the one to delete
updatedDocuments = []
removed = False
for doc in documents:
docId = doc.get("id")
fileIdValue = doc.get("fileId")
# Flexible matching approach
shouldRemove = (
(docId == fileId) or
(fileIdValue == fileId) or
(isinstance(docId, str) and str(fileId) in docId) or
(isinstance(fileIdValue, str) and str(fileId) in fileIdValue)
)
if shouldRemove:
removed = True
logger.info(f"Found file to remove: docId={docId}, fileId={fileIdValue}")
else:
updatedDocuments.append(doc)
if not removed:
logger.warning(f"No matching file {fileId} found in message {messageId}")
return False
# Update message with modified documents array
messageUpdate = {
"documents": updatedDocuments
}
# Apply the update directly to the database
updated = self.db.recordModify("workflowMessages", message["id"], messageUpdate)
if updated:
logger.info(f"Successfully removed file {fileId} from message {messageId}")
return True
else:
logger.warning(f"Failed to update message {messageId} in database")
return False
except Exception as e:
logger.error(f"Error removing file {fileId} from message {messageId}: {str(e)}")
return False
# Workflow Logs
def getWorkflowLogs(self, workflowId: str) -> List[Dict[str, Any]]:
"""Returns all log entries for a workflow"""
return self.db.getRecordset("workflowLogs", recordFilter={"workflowId": workflowId})
def createWorkflowLog(self, logData: Dict[str, Any]) -> Dict[str, Any]:
"""Creates a new log entry for a workflow"""
# Make sure required fields are present
if "timestamp" not in logData:
logData["timestamp"] = self._getCurrentTimestamp()
# Add status information if not present
if "status" not in logData and "type" in logData:
if logData["type"] == "error":
logData["status"] = "error"
else:
logData["status"] = "running"
# Add progress information if not present
if "progress" not in logData:
# Default progress values based on log type
if logData.get("type") == "info":
logData["progress"] = 50 # Default middle progress
elif logData.get("type") == "error":
logData["progress"] = -1 # Error state
elif logData.get("type") == "warning":
logData["progress"] = 50 # Default middle progress
return self.db.recordCreate("workflowLogs", logData)
# Workflow Management
def saveWorkflowState(self, workflow: Dict[str, Any], saveMessages: bool = True, saveLogs: bool = True) -> bool:
"""
Saves the state of a workflow to the database.
Workflow data is updated, but messages are stored separately.
Args:
workflow: The workflow object
saveMessages: Flag to determine if messages should be saved
saveLogs: Flag to determine if logs should be saved
Returns:
True on success, False on failure
"""
try:
workflowId = workflow.get("id")
if not workflowId:
return False
# Extract only the database-relevant workflow fields
# IMPORTANT: Don't store messages in the workflow table!
workflowDbData = {
"id": workflowId,
"mandateId": workflow.get("mandateId", self.mandateId),
"userId": workflow.get("userId", self.userId),
"name": workflow.get("name", f"Workflow {workflowId}"),
"status": workflow.get("status", "completed"),
"startedAt": workflow.get("startedAt", self._getCurrentTimestamp()),
"lastActivity": workflow.get("lastActivity", self._getCurrentTimestamp()),
"dataStats": workflow.get("dataStats", {})
}
# Check if workflow already exists
existingWorkflow = self.getWorkflow(workflowId)
if existingWorkflow:
self.updateWorkflow(workflowId, workflowDbData)
else:
self.createWorkflow(workflowDbData)
# Save messages
if saveMessages and "messages" in workflow:
for message in workflow["messages"]:
messageId = message.get("id")
if not messageId:
continue
# Since each message is already saved with createWorkflowMessage,
# we only need to check if updates are necessary
# First, get existing message from database
existingMessages = self.getWorkflowMessages(workflowId)
existingMessage = next((m for m in existingMessages if m.get("id") == messageId), None)
if existingMessage:
# Check if updates are needed
hasChanges = False
for key in ["role", "agentName", "content", "status", "documents"]:
if key in message and message.get(key) != existingMessage.get(key):
hasChanges = True
break
if hasChanges:
# Extract only relevant data for the database
messageData = {
"role": message.get("role", existingMessage.get("role", "unknown")),
"content": message.get("content", existingMessage.get("content", "")),
"agentName": message.get("agentName", existingMessage.get("agentName", "")),
"status": message.get("status", existingMessage.get("status", "completed")),
"documents": message.get("documents", existingMessage.get("documents", []))
}
self.updateWorkflowMessage(messageId, messageData)
else:
# Message doesn't exist in database yet
# It should have been saved via createWorkflowMessage
# If not, log a warning
logger.warning(f"Message {messageId} in workflow {workflowId} not found in database")
# Save logs
if saveLogs and "logs" in workflow:
# Get existing logs
existingLogs = {log["id"]: log for log in self.getWorkflowLogs(workflowId)}
for log in workflow["logs"]:
logId = log.get("id")
if not logId:
continue
# Extract only relevant data for the database
logData = {
"id": logId,
"workflowId": workflowId,
"message": log.get("message", ""),
"type": log.get("type", "info"),
"timestamp": log.get("timestamp", self._getCurrentTimestamp()),
"agentName": log.get("agentName", "(undefined)"),
"status": log.get("status", "running"),
"progress": log.get("progress", 50)
}
# Create or update log
if logId in existingLogs:
self.db.recordModify("workflowLogs", logId, logData)
else:
self.db.recordCreate("workflowLogs", logData)
return True
except Exception as e:
logger.error(f"Error saving workflow state: {str(e)}")
return False
def loadWorkflowState(self, workflowId: str) -> Optional[Dict[str, Any]]:
"""
Loads the complete state of a workflow from the database.
This includes the workflow itself, messages, and logs.
Args:
workflowId: ID of the workflow to load
Returns:
The complete workflow object or None on error
"""
try:
# Load base workflow
workflow = self.getWorkflow(workflowId)
if not workflow:
return None
# Log the workflow base retrieval
logger.debug(f"Loaded base workflow {workflowId} from database")
# Load messages
messages = self.getWorkflowMessages(workflowId)
# Sort by sequence number
messages.sort(key=lambda x: x.get("sequenceNo", 0))
# Debug log for messages and document counts
messageCount = len(messages)
logger.debug(f"Loaded {messageCount} messages for workflow {workflowId}")
# Check if messageIds exists and is valid
messageIds = workflow.get("messageIds", [])
if not messageIds or len(messageIds) != len(messages):
# Rebuild messageIds from messages
messageIds = [msg.get("id") for msg in messages]
# Update in database
self.updateWorkflow(workflowId, {"messageIds": messageIds})
logger.info(f"Rebuilt messageIds for workflow {workflowId}")
# Log document counts for each message
for msg in messages:
docCount = len(msg.get("documents", []))
if docCount > 0:
logger.info(f"Message {msg.get('id')} has {docCount} documents loaded from database")
# Load logs
logs = self.getWorkflowLogs(workflowId)
# Sort by timestamp
logs.sort(key=lambda x: x.get("timestamp", ""))
# Assemble complete workflow object
completeWorkflow = workflow.copy()
completeWorkflow["messages"] = messages
completeWorkflow["messageIds"] = messageIds # Ensure messageIds is included
completeWorkflow["logs"] = logs
return completeWorkflow
except Exception as e:
logger.error(f"Error loading workflow state: {str(e)}")
return None
# Singleton factory for LucyDOMInterface instances per context
_lucydomInterfaces = {}
def getLucydomInterface(mandateId: int = 0, userId: int = 0) -> LucyDOMInterface:
"""
Returns a LucyDOMInterface instance for the specified context.
Reuses existing instances.
Args:
mandateId: ID of the mandate
userId: ID of the user
Returns:
LucyDOMInterface instance
"""
contextKey = f"{mandateId}_{userId}"
if contextKey not in _lucydomInterfaces:
# Create new interface instance
interface = LucyDOMInterface(mandateId, userId)
# Initialize AI service
aiService = ChatService()
interface.aiService = aiService # Directly set the attribute
_lucydomInterfaces[contextKey] = interface
return _lucydomInterfaces[contextKey]
# Init
getLucydomInterface()