1279 lines
No EOL
51 KiB
Python
1279 lines
No EOL
51 KiB
Python
"""
|
|
Interface to LucyDOM database and AI Connectors.
|
|
Uses the JSON connector for data access with added language support.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional, Union
|
|
|
|
import importlib
|
|
import hashlib
|
|
|
|
# DYNAMIC PART: Connectors to the Interface
|
|
from connectors.connectorDbJson import DatabaseConnector
|
|
from connectors.connectorAiOpenai import ChatService
|
|
|
|
# Basic Configurations
|
|
from modules.configuration import APP_CONFIG
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Custom exceptions for file handling
|
|
class FileError(Exception):
|
|
"""Base class for file handling exceptions."""
|
|
pass
|
|
|
|
class FileNotFoundError(FileError):
|
|
"""Exception raised when a file is not found."""
|
|
pass
|
|
|
|
class FileStorageError(FileError):
|
|
"""Exception raised when there's an error storing a file."""
|
|
pass
|
|
|
|
class FilePermissionError(FileError):
|
|
"""Exception raised when there's a permission issue with a file."""
|
|
pass
|
|
|
|
class FileDeletionError(FileError):
|
|
"""Exception raised when there's an error deleting a file."""
|
|
pass
|
|
|
|
|
|
class LucyDOMInterface:
|
|
"""
|
|
Interface to the LucyDOM database.
|
|
Uses the JSON connector for data access.
|
|
"""
|
|
|
|
def __init__(self, mandateId: int, userId: int):
|
|
"""
|
|
Initializes the LucyDOM Interface with mandate and user context.
|
|
|
|
Args:
|
|
mandateId: ID of the current mandate
|
|
userId: ID of the current user
|
|
"""
|
|
self.mandateId = mandateId
|
|
self.userId = userId
|
|
|
|
# Add language settings
|
|
self.userLanguage = "en" # Default user language
|
|
self.aiService = None # Will be set externally
|
|
|
|
# Import data model module
|
|
try:
|
|
self.modelModule = importlib.import_module("modules.lucydomModel")
|
|
logger.info("lucydomModel successfully imported")
|
|
except ImportError as e:
|
|
logger.error(f"Error importing lucydomModel: {e}")
|
|
raise
|
|
|
|
# Initialize database if needed
|
|
self._initializeDatabase()
|
|
|
|
def _initializeDatabase(self):
|
|
"""
|
|
Initializes the database with minimal objects for the logged-in user in the mandate, if it doesn't exist yet.
|
|
No initialization without a valid user.
|
|
Creates an initial dataset for each table defined in the data model.
|
|
"""
|
|
effectiveMandateId = self.mandateId
|
|
effectiveUserId = self.userId
|
|
if effectiveMandateId is None or effectiveUserId is None:
|
|
#data available
|
|
return
|
|
|
|
self.db = DatabaseConnector(
|
|
dbHost=APP_CONFIG.get("DB_LUCYDOM_HOST"),
|
|
dbDatabase=APP_CONFIG.get("DB_LUCYDOM_DATABASE"),
|
|
dbUser=APP_CONFIG.get("DB_LUCYDOM_USER"),
|
|
dbPassword=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"),
|
|
mandateId=self.mandateId,
|
|
userId=self.userId,
|
|
skipInitialIdLookup=True
|
|
)
|
|
|
|
# Initialize standard prompts for different areas
|
|
prompts = self.db.getRecordset("prompts")
|
|
if not prompts:
|
|
logger.info("Creating standard prompts")
|
|
|
|
# Define standard prompts
|
|
standardPrompts = [
|
|
{
|
|
"mandateId": effectiveMandateId,
|
|
"userId": effectiveUserId,
|
|
"content": "Research the current market trends and developments in [TOPIC]. Collect information about leading companies, innovative products or services, and current challenges. Present the results in a structured overview with relevant data and sources.",
|
|
"name": "Web Research: Market Research"
|
|
},
|
|
{
|
|
"mandateId": effectiveMandateId,
|
|
"userId": effectiveUserId,
|
|
"content": "Analyze the attached dataset on [TOPIC] and identify the most important trends, patterns, and anomalies. Perform statistical calculations to support your findings. Present the results in a clearly structured analysis and draw relevant conclusions.",
|
|
"name": "Analysis: Data Analysis"
|
|
},
|
|
{
|
|
"mandateId": effectiveMandateId,
|
|
"userId": effectiveUserId,
|
|
"content": "Create a detailed protocol of our meeting on [TOPIC]. Capture all discussed points, decisions made, and agreed measures. Structure the protocol clearly with agenda items, participant list, and clear responsibilities for follow-up actions.",
|
|
"name": "Protocol: Meeting Minutes"
|
|
},
|
|
{
|
|
"mandateId": effectiveMandateId,
|
|
"userId": effectiveUserId,
|
|
"content": "Develop a UI/UX design concept for [APPLICATION/WEBSITE]. Consider the target audience, main functions, and brand identity. Describe the visual design, navigation, interaction patterns, and information architecture. Explain how the design optimizes user-friendliness and user experience.",
|
|
"name": "Design: UI/UX Design"
|
|
}
|
|
]
|
|
|
|
# Create prompts
|
|
for promptData in standardPrompts:
|
|
createdPrompt = self.db.recordCreate("prompts", promptData)
|
|
logger.info(f"Prompt '{promptData.get('name', 'Standard')}' was created with ID {createdPrompt['id']}")
|
|
|
|
# Language support methods
|
|
|
|
def setUserLanguage(self, languageCode: str):
|
|
"""Set the user's preferred language"""
|
|
self.userLanguage = languageCode
|
|
logger.info(f"User language set to: {languageCode}")
|
|
|
|
async def callAi(self, messages: List[Dict[str, str]], produceUserAnswer: bool = False, temperature: float = None) -> str:
|
|
"""
|
|
Enhanced AI service call with language support
|
|
|
|
Args:
|
|
messages: List of message dictionaries
|
|
produceUserAnswer: Whether this response is for the end-user
|
|
temperature: Optional temperature setting
|
|
|
|
Returns:
|
|
AI response text
|
|
"""
|
|
if not self.aiService:
|
|
logger.error("AI service not set in LucyDOMInterface")
|
|
return "Error: AI service not available"
|
|
|
|
# Add language instruction for user-facing responses
|
|
if produceUserAnswer and self.userLanguage:
|
|
ltext= f"Please respond in '{self.userLanguage}' language."
|
|
if messages and messages[0]["role"] == "system":
|
|
if "language" not in messages[0]["content"].lower():
|
|
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
|
|
else:
|
|
# Insert a system message with language instruction
|
|
messages.insert(0, {
|
|
"role": "system",
|
|
"content": ltext
|
|
})
|
|
|
|
# Call the AI service
|
|
if temperature is not None:
|
|
return await self.aiService.callApi(messages, temperature=temperature)
|
|
else:
|
|
return await self.aiService.callApi(messages)
|
|
|
|
# Utilities
|
|
|
|
def getInitialId(self, table: str) -> Optional[int]:
|
|
"""
|
|
Returns the initial ID for a table.
|
|
|
|
Args:
|
|
table: Name of the table
|
|
|
|
Returns:
|
|
The initial ID or None if not present
|
|
"""
|
|
return self.db.getInitialId(table)
|
|
|
|
def _getCurrentTimestamp(self) -> str:
|
|
"""Returns the current timestamp in ISO format"""
|
|
return datetime.now().isoformat()
|
|
|
|
|
|
# Prompt methods
|
|
|
|
def getAllPrompts(self) -> List[Dict[str, Any]]:
|
|
"""Returns all prompts for the current mandate"""
|
|
return self.db.getRecordset("prompts")
|
|
|
|
def getPrompt(self, promptId: int) -> Optional[Dict[str, Any]]:
|
|
"""Returns a prompt by its ID"""
|
|
prompts = self.db.getRecordset("prompts", recordFilter={"id": promptId})
|
|
if prompts:
|
|
return prompts[0]
|
|
return None
|
|
|
|
def createPrompt(self, content: str, name: str) -> Dict[str, Any]:
|
|
"""Creates a new prompt"""
|
|
promptData = {
|
|
"mandateId": self.mandateId,
|
|
"userId": self.userId,
|
|
"content": content,
|
|
"name": name,
|
|
"createdAt": self._getCurrentTimestamp()
|
|
}
|
|
|
|
return self.db.recordCreate("prompts", promptData)
|
|
|
|
def updatePrompt(self, promptId: int, content: str = None, name: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing prompt
|
|
|
|
Args:
|
|
promptId: ID of the prompt to update
|
|
content: New content for the prompt
|
|
|
|
Returns:
|
|
The updated prompt object
|
|
"""
|
|
# Check if the prompt exists
|
|
prompt = self.getPrompt(promptId)
|
|
if not prompt:
|
|
return None
|
|
|
|
# Prepare data for update
|
|
promptData = {}
|
|
|
|
if content is not None:
|
|
promptData["content"] = content
|
|
if name is not None:
|
|
promptData["name"] = name
|
|
|
|
# Update prompt
|
|
return self.db.recordModify("prompts", promptId, promptData)
|
|
|
|
def deletePrompt(self, promptId: int) -> bool:
|
|
"""
|
|
Deletes a prompt from the database
|
|
|
|
Args:
|
|
promptId: ID of the prompt to delete
|
|
|
|
Returns:
|
|
True if the prompt was successfully deleted, otherwise False
|
|
"""
|
|
return self.db.recordDelete("prompts", promptId)
|
|
|
|
|
|
# File Utilities
|
|
|
|
def calculateFileHash(self, fileContent: bytes) -> str:
|
|
"""Calculates a SHA-256 hash for the file content"""
|
|
return hashlib.sha256(fileContent).hexdigest()
|
|
|
|
def checkForDuplicateFile(self, fileHash: str) -> Optional[Dict[str, Any]]:
|
|
"""Checks if a file with the same hash already exists"""
|
|
files = self.db.getRecordset("files", recordFilter={"fileHash": fileHash})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def getMimeType(self, filename: str) -> str:
|
|
"""Determines the MIME type based on the file extension"""
|
|
import os
|
|
ext = os.path.splitext(filename)[1].lower()[1:]
|
|
extensionToMime = {
|
|
"pdf": "application/pdf",
|
|
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"doc": "application/msword",
|
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"xls": "application/vnd.ms-excel",
|
|
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
"ppt": "application/vnd.ms-powerpoint",
|
|
"csv": "text/csv",
|
|
"txt": "text/plain",
|
|
"json": "application/json",
|
|
"xml": "application/xml",
|
|
"html": "text/html",
|
|
"htm": "text/html",
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"png": "image/png",
|
|
"gif": "image/gif",
|
|
"webp": "image/webp",
|
|
"svg": "image/svg+xml",
|
|
"py": "text/x-python",
|
|
"js": "application/javascript",
|
|
"css": "text/css"
|
|
}
|
|
return extensionToMime.get(ext.lower(), "application/octet-stream")
|
|
|
|
|
|
# File methods - metadata-based operations
|
|
|
|
def getAllFiles(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Returns all files for the current mandate without binary data.
|
|
|
|
Returns:
|
|
List of FileItem objects without binary data
|
|
"""
|
|
files = self.db.getRecordset("files")
|
|
return files
|
|
|
|
def getFile(self, fileId: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Returns a file by its ID, without binary data.
|
|
|
|
Args:
|
|
fileId: ID of the file
|
|
|
|
Returns:
|
|
FileItem without binary data or None if not found
|
|
"""
|
|
files = self.db.getRecordset("files", recordFilter={"id": fileId})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def createFile(self, name: str, mimeType: str, size: int = None, fileHash: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Creates a new file entry in the database without content.
|
|
The actual file content is stored separately in the FileData table.
|
|
|
|
Args:
|
|
name: Name of the file
|
|
mimeType: MIME type of the file
|
|
size: Size of the file in bytes
|
|
fileHash: Hash value of the file for deduplication
|
|
|
|
Returns:
|
|
The created FileItem object
|
|
"""
|
|
fileData = {
|
|
"mandateId": self.mandateId,
|
|
"userId": self.userId,
|
|
"name": name,
|
|
"mimeType": mimeType,
|
|
"size": size,
|
|
"fileHash": fileHash,
|
|
"creationDate": self._getCurrentTimestamp()
|
|
}
|
|
return self.db.recordCreate("files", fileData)
|
|
|
|
def updateFile(self, fileId: int, updateData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates the metadata of an existing file without affecting the binary data.
|
|
|
|
Args:
|
|
fileId: ID of the file to update
|
|
updateData: Dictionary with fields to update
|
|
|
|
Returns:
|
|
The updated FileItem object
|
|
"""
|
|
# Check if the file exists
|
|
file = self.getFile(fileId)
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {fileId} not found")
|
|
|
|
# Update file
|
|
return self.db.recordModify("files", fileId, updateData)
|
|
|
|
def deleteFile(self, fileId: int) -> bool:
|
|
"""
|
|
Deletes a file from the database (metadata and content).
|
|
|
|
Args:
|
|
fileId: ID of the file
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Find the file in the database
|
|
file = self.getFile(fileId)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {fileId} not found")
|
|
|
|
# Check if the file belongs to the current mandate
|
|
if file.get("mandateId") != self.mandateId:
|
|
raise FilePermissionError(f"No permission to delete file {fileId}")
|
|
|
|
# Check for other references to this file (by hash)
|
|
fileHash = file.get("fileHash")
|
|
if fileHash:
|
|
otherReferences = [f for f in self.db.getRecordset("files", recordFilter={"fileHash": fileHash})
|
|
if f.get("id") != fileId]
|
|
|
|
# If other files reference this content, only delete the database entry for FileItem
|
|
if otherReferences:
|
|
logger.info(f"Other references to the file content found, only FileItem will be deleted: {fileId}")
|
|
else:
|
|
# Also delete the file content in the FileData table
|
|
try:
|
|
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
|
|
if fileDataEntries:
|
|
self.db.recordDelete("fileData", fileId)
|
|
logger.info(f"FileData for file {fileId} deleted")
|
|
except Exception as e:
|
|
logger.warning(f"Error deleting FileData for file {fileId}: {str(e)}")
|
|
|
|
# Delete the FileItem entry
|
|
return self.db.recordDelete("files", fileId)
|
|
|
|
except FileNotFoundError as e:
|
|
# Pass through FileNotFoundError
|
|
raise
|
|
except FilePermissionError as e:
|
|
# Pass through FilePermissionError
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting file {fileId}: {str(e)}")
|
|
raise FileDeletionError(f"Error deleting file: {str(e)}")
|
|
|
|
|
|
# FileData methods - binary data operations
|
|
|
|
def createFileData(self, fileId: int, data: bytes) -> bool:
|
|
"""
|
|
Stores the binary data of a file in the database as a Base64 string.
|
|
|
|
Args:
|
|
fileId: ID of the associated file
|
|
data: Binary data
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert binary data to base64 string
|
|
if isinstance(data, bytes):
|
|
encodedData = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encodedData)}")
|
|
else:
|
|
logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...")
|
|
# Try to convert to bytes if it's not already
|
|
if isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encodedData = data
|
|
logger.info(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encodedData = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.info(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encodedData = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Create the fileData record with encoded data
|
|
fileData = {
|
|
"id": fileId,
|
|
"data": encodedData
|
|
}
|
|
|
|
self.db.recordCreate("fileData", fileData)
|
|
logger.info(f"Successfully stored encoded data for file {fileId}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error storing binary data for file {fileId}: {str(e)}")
|
|
return False
|
|
|
|
def getFileData(self, fileId: int) -> Optional[bytes]:
|
|
"""
|
|
Returns the binary data of a file.
|
|
Converts Base64 string from the database back to bytes.
|
|
|
|
Args:
|
|
fileId: ID of the file
|
|
|
|
Returns:
|
|
Binary data or None if not found
|
|
"""
|
|
import base64
|
|
|
|
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
|
|
if fileDataEntries and "data" in fileDataEntries[0]:
|
|
encodedData = fileDataEntries[0]["data"]
|
|
|
|
try:
|
|
# Check if it's a string (most likely base64)
|
|
if isinstance(encodedData, str):
|
|
try:
|
|
# Try to decode base64
|
|
binaryData = base64.b64decode(encodedData)
|
|
logger.debug(f"Successfully decoded base64 string to {len(binaryData)} bytes")
|
|
return binaryData
|
|
except Exception as e:
|
|
logger.error(f"Failed to decode base64 data: {str(e)}")
|
|
# If it's not valid base64, return as bytes
|
|
return encodedData.encode('utf-8')
|
|
# If it's already bytes (shouldn't happen with model change)
|
|
elif isinstance(encodedData, bytes):
|
|
logger.warning(f"Data was already bytes, no conversion needed")
|
|
return encodedData
|
|
else:
|
|
logger.error(f"Unexpected data type in database: {type(encodedData)}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error processing file data: {str(e)}")
|
|
return None
|
|
else:
|
|
logger.warning(f"No data found for file ID {fileId}")
|
|
return None
|
|
|
|
def updateFileData(self, fileId: int, data: Union[bytes, str]) -> bool:
|
|
"""
|
|
Updates the binary data of a file in the database.
|
|
Converts bytes to Base64 string for storage.
|
|
|
|
Args:
|
|
fileId: ID of the file
|
|
data: New binary data or encoded data
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert data to base64 string if it's bytes
|
|
if isinstance(data, bytes):
|
|
encodedData = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string")
|
|
elif isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encodedData = data
|
|
logger.debug(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encodedData = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.debug(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encodedData = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Check if a record already exists
|
|
fileDataEntries = self.db.getRecordset("fileData", recordFilter={"id": fileId})
|
|
|
|
if fileDataEntries:
|
|
# Update the existing record
|
|
self.db.recordModify("fileData", fileId, {"data": encodedData})
|
|
logger.info(f"Updated existing file data for file ID {fileId}")
|
|
else:
|
|
# Create a new record
|
|
fileData = {
|
|
"id": fileId,
|
|
"data": encodedData
|
|
}
|
|
self.db.recordCreate("fileData", fileData)
|
|
logger.info(f"Created new file data for file ID {fileId}")
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error updating binary data for file {fileId}: {str(e)}")
|
|
return False
|
|
|
|
def saveUploadedFile(self, fileContent: bytes, fileName: str) -> Dict[str, Any]:
|
|
"""
|
|
Saves an uploaded file in the database.
|
|
Metadata is stored in the 'files' table,
|
|
Binary data in the 'fileData' table as a Base64 string.
|
|
|
|
Args:
|
|
fileContent: Binary data of the file
|
|
fileName: Name of the file
|
|
|
|
Returns:
|
|
Dictionary with metadata of the saved file
|
|
"""
|
|
try:
|
|
# Debug: Log the start of the file upload process
|
|
logger.info(f"Starting upload process for file: {fileName}")
|
|
|
|
# Debug: Check if fileContent is valid bytes
|
|
if not isinstance(fileContent, bytes):
|
|
logger.error(f"Invalid fileContent type: {type(fileContent)}")
|
|
raise ValueError(f"fileContent must be bytes, got {type(fileContent)}")
|
|
|
|
# Calculate file hash for deduplication
|
|
fileHash = self.calculateFileHash(fileContent)
|
|
logger.debug(f"Calculated file hash: {fileHash}")
|
|
|
|
# Check for duplicate
|
|
existingFile = self.checkForDuplicateFile(fileHash)
|
|
if existingFile:
|
|
# Simply return the existing file metadata
|
|
logger.info(f"Duplicate found for {fileName}: {existingFile['id']}")
|
|
return existingFile
|
|
|
|
# Determine MIME type
|
|
mimeType = self.getMimeType(fileName)
|
|
|
|
# Determine file size
|
|
fileSize = len(fileContent)
|
|
|
|
# 1. Save metadata in the 'files' table
|
|
logger.info(f"Saving file metadata to database for file: {fileName}")
|
|
dbFile = self.createFile(
|
|
name=fileName,
|
|
mimeType=mimeType,
|
|
size=fileSize,
|
|
fileHash=fileHash
|
|
)
|
|
|
|
# 2. Save binary data as Base64 string in the 'fileData' table
|
|
logger.info(f"Saving file content to database for file: {fileName}")
|
|
self.createFileData(dbFile["id"], fileContent)
|
|
|
|
# Debug: Export file to static folder
|
|
self._exportFileToStatic(fileContent, dbFile["id"], fileName) # DEBUG TODO
|
|
|
|
# Debug: Verify database record was created
|
|
if not dbFile:
|
|
logger.warning(f"Database record for file {fileName} was not created properly")
|
|
else:
|
|
logger.debug(f"Database record created for file {fileName}")
|
|
|
|
logger.info(f"File upload process completed for: {fileName}")
|
|
return dbFile
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in saveUploadedFile for {fileName}: {str(e)}", exc_info=True)
|
|
raise FileStorageError(f"Error saving file: {str(e)}")
|
|
|
|
def downloadFile(self, fileId: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Returns a file for download, including binary data.
|
|
|
|
Args:
|
|
fileId: ID of the file
|
|
|
|
Returns:
|
|
Dictionary with file data and metadata or None if not found
|
|
"""
|
|
try:
|
|
# 1. Get metadata from the 'files' table
|
|
file = self.getFile(fileId)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {fileId} not found")
|
|
|
|
# 2. Get binary data from the 'fileData' table
|
|
fileContent = self.getFileData(fileId)
|
|
|
|
if fileContent is None:
|
|
raise FileNotFoundError(f"Binary data for file with ID {fileId} not found")
|
|
|
|
return {
|
|
"id": fileId,
|
|
"name": file.get("name", f"file_{fileId}"),
|
|
"contentType": file.get("mimeType", "application/octet-stream"),
|
|
"size": file.get("size", len(fileContent)),
|
|
"content": fileContent
|
|
}
|
|
except FileNotFoundError as e:
|
|
# Re-raise FileNotFoundError as is
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error downloading file {fileId}: {str(e)}")
|
|
raise FileError(f"Error downloading file: {str(e)}")
|
|
|
|
def _exportFileToStatic(self, fileContent: bytes, fileId: int, fileName: str):
|
|
debugFilename = f"{fileId}_{fileName}"
|
|
with open(f"./static/{debugFilename}", 'wb') as f:
|
|
f.write(fileContent)
|
|
|
|
# Workflow methods
|
|
|
|
def getAllWorkflows(self) -> List[Dict[str, Any]]:
|
|
"""Returns all workflows for the current mandate"""
|
|
return self.db.getRecordset("workflows")
|
|
|
|
def getWorkflowsByUser(self, userId: int) -> List[Dict[str, Any]]:
|
|
"""Returns all workflows for a user"""
|
|
return self.db.getRecordset("workflows", recordFilter={"userId": userId})
|
|
|
|
def getWorkflow(self, workflowId: str) -> Optional[Dict[str, Any]]:
|
|
"""Returns a workflow by its ID"""
|
|
workflows = self.db.getRecordset("workflows", recordFilter={"id": workflowId})
|
|
if workflows:
|
|
return workflows[0]
|
|
return None
|
|
|
|
def createWorkflow(self, workflowData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Creates a new workflow in the database"""
|
|
# Make sure mandateId and userId are set
|
|
if "mandateId" not in workflowData:
|
|
workflowData["mandateId"] = self.mandateId
|
|
|
|
if "userId" not in workflowData:
|
|
workflowData["userId"] = self.userId
|
|
|
|
# Set timestamp if not present
|
|
currentTime = self._getCurrentTimestamp()
|
|
if "startedAt" not in workflowData:
|
|
workflowData["startedAt"] = currentTime
|
|
|
|
if "lastActivity" not in workflowData:
|
|
workflowData["lastActivity"] = currentTime
|
|
|
|
return self.db.recordCreate("workflows", workflowData)
|
|
|
|
def updateWorkflow(self, workflowId: str, workflowData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing workflow.
|
|
|
|
Args:
|
|
workflowId: ID of the workflow to update
|
|
workflowData: New data for the workflow
|
|
|
|
Returns:
|
|
The updated workflow object
|
|
"""
|
|
# Check if the workflow exists
|
|
workflow = self.getWorkflow(workflowId)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Set update time
|
|
workflowData["lastActivity"] = self._getCurrentTimestamp()
|
|
|
|
# Update workflow
|
|
return self.db.recordModify("workflows", workflowId, workflowData)
|
|
|
|
def deleteWorkflow(self, workflowId: str) -> bool:
|
|
"""
|
|
Deletes a workflow from the database.
|
|
|
|
Args:
|
|
workflowId: ID of the workflow to delete
|
|
|
|
Returns:
|
|
True on success, False if the workflow doesn't exist
|
|
"""
|
|
# Check if the workflow exists
|
|
workflow = self.getWorkflow(workflowId)
|
|
if not workflow:
|
|
return False
|
|
|
|
# Check if the user is the owner or has admin rights
|
|
if workflow.get("userId") != self.userId:
|
|
# Here could be a check for admin rights
|
|
return False
|
|
|
|
# Delete workflow
|
|
return self.db.recordDelete("workflows", workflowId)
|
|
|
|
|
|
# Workflow Messages
|
|
|
|
def getWorkflowMessages(self, workflowId: str) -> List[Dict[str, Any]]:
|
|
"""Returns all messages of a workflow"""
|
|
return self.db.getRecordset("workflowMessages", recordFilter={"workflowId": workflowId})
|
|
|
|
def createWorkflowMessage(self, messageData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Creates a new message for a workflow.
|
|
|
|
Args:
|
|
messageData: The message data
|
|
|
|
Returns:
|
|
The created message or None on error
|
|
"""
|
|
try:
|
|
# Check if required fields are present
|
|
requiredFields = ["id", "workflowId"]
|
|
for field in requiredFields:
|
|
if field not in messageData:
|
|
logger.error(f"Required field '{field}' missing in messageData")
|
|
raise ValueError(f"Required field '{field}' missing in message data")
|
|
|
|
# Validate that ID is not None
|
|
if messageData["id"] is None:
|
|
messageData["id"] = f"msg_{uuid.uuid4()}"
|
|
logger.warning(f"Automatically generated ID for workflow message: {messageData['id']}")
|
|
|
|
# Ensure required fields are present
|
|
if "startedAt" not in messageData and "createdAt" not in messageData:
|
|
messageData["startedAt"] = self._getCurrentTimestamp()
|
|
|
|
if "createdAt" in messageData and "startedAt" not in messageData:
|
|
messageData["startedAt"] = messageData["createdAt"]
|
|
del messageData["createdAt"]
|
|
|
|
# Set status if not present
|
|
if "status" not in messageData:
|
|
messageData["status"] = "completed"
|
|
|
|
# Set sequence number if not present
|
|
if "sequenceNo" not in messageData:
|
|
# Get current messages to determine next sequence number
|
|
existingMessages = self.getWorkflowMessages(messageData["workflowId"])
|
|
messageData["sequenceNo"] = len(existingMessages) + 1
|
|
|
|
# Ensure role and agentName are present
|
|
if "role" not in messageData:
|
|
messageData["role"] = "assistant" if messageData.get("agentName") else "user"
|
|
|
|
if "agentName" not in messageData:
|
|
messageData["agentName"] = ""
|
|
|
|
# Debug log for data to create
|
|
logger.debug(f"Creating workflow message with data: {messageData}")
|
|
|
|
# Create message in database
|
|
createdMessage = self.db.recordCreate("workflowMessages", messageData)
|
|
|
|
# Update workflow's messageIds if this is a new message
|
|
if createdMessage:
|
|
workflowId = messageData["workflowId"]
|
|
workflow = self.getWorkflow(workflowId)
|
|
|
|
if workflow:
|
|
# Get current messageIds or initialize empty list
|
|
messageIds = workflow.get("messageIds", [])
|
|
|
|
# Add the new message ID if not already in the list
|
|
if createdMessage["id"] not in messageIds:
|
|
messageIds.append(createdMessage["id"])
|
|
self.updateWorkflow(workflowId, {"messageIds": messageIds})
|
|
|
|
return createdMessage
|
|
except Exception as e:
|
|
logger.error(f"Error creating workflow message: {str(e)}")
|
|
# Return None instead of raising to avoid cascading failures
|
|
return None
|
|
|
|
def updateWorkflowMessage(self, messageId: str, messageData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing workflow message in the database.
|
|
|
|
Args:
|
|
messageId: ID of the message
|
|
messageData: Data to update
|
|
|
|
Returns:
|
|
The updated message object or None on error
|
|
"""
|
|
try:
|
|
# Debug info
|
|
logger.debug(f"Updating message {messageId} in database")
|
|
|
|
# Ensure messageId is provided
|
|
if not messageId:
|
|
logger.error("No messageId provided for updateWorkflowMessage")
|
|
raise ValueError("messageId cannot be empty")
|
|
|
|
# Check if message exists in database
|
|
messages = self.db.getRecordset("workflowMessages", recordFilter={"id": messageId})
|
|
if not messages:
|
|
logger.warning(f"Message with ID {messageId} does not exist in database")
|
|
|
|
# If message doesn't exist but we have workflowId, create it
|
|
if "workflowId" in messageData:
|
|
logger.info(f"Creating new message with ID {messageId} for workflow {messageData.get('workflowId')}")
|
|
return self.db.recordCreate("workflowMessages", messageData)
|
|
else:
|
|
logger.error(f"Workflow ID missing for new message {messageId}")
|
|
return None
|
|
|
|
# Update existing message
|
|
existingMessage = messages[0]
|
|
|
|
# Ensure required fields present
|
|
for key in ["role", "agentName"]:
|
|
if key not in messageData and key not in existingMessage:
|
|
messageData[key] = "assistant" if key == "role" else ""
|
|
|
|
# Ensure ID is in the dataset
|
|
if 'id' not in messageData:
|
|
messageData['id'] = messageId
|
|
|
|
# Convert createdAt to startedAt if needed
|
|
if "createdAt" in messageData and "startedAt" not in messageData:
|
|
messageData["startedAt"] = messageData["createdAt"]
|
|
del messageData["createdAt"]
|
|
|
|
# Update the message
|
|
updatedMessage = self.db.recordModify("workflowMessages", messageId, messageData)
|
|
if updatedMessage:
|
|
logger.info(f"Message {messageId} updated successfully")
|
|
else:
|
|
logger.warning(f"Failed to update message {messageId}")
|
|
|
|
return updatedMessage
|
|
except Exception as e:
|
|
logger.error(f"Error updating message {messageId}: {str(e)}", exc_info=True)
|
|
# Re-raise with full information
|
|
raise ValueError(f"Error updating message {messageId}: {str(e)}")
|
|
|
|
def deleteWorkflowMessage(self, workflowId: str, messageId: str) -> bool:
|
|
"""
|
|
Deletes a message from a workflow in the database.
|
|
|
|
Args:
|
|
workflowId: ID of the associated workflow
|
|
messageId: ID of the message to delete
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Check if the message exists
|
|
messages = self.getWorkflowMessages(workflowId)
|
|
message = next((m for m in messages if m.get("id") == messageId), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Message {messageId} for workflow {workflowId} not found")
|
|
return False
|
|
|
|
# Delete the message from the database
|
|
return self.db.recordDelete("workflowMessages", messageId)
|
|
except Exception as e:
|
|
logger.error(f"Error deleting message {messageId}: {str(e)}")
|
|
return False
|
|
|
|
def deleteFileFromMessage(self, workflowId: str, messageId: str, fileId: int) -> bool:
|
|
"""
|
|
Removes a file reference from a message.
|
|
The file itself is not deleted, only the reference in the message.
|
|
Enhanced version with improved file matching.
|
|
|
|
Args:
|
|
workflowId: ID of the associated workflow
|
|
messageId: ID of the message
|
|
fileId: ID of the file to remove
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Log operation
|
|
logger.info(f"Removing file {fileId} from message {messageId} in workflow {workflowId}")
|
|
|
|
# Get all workflow messages
|
|
allMessages = self.getWorkflowMessages(workflowId)
|
|
logger.debug(f"Workflow {workflowId} has {len(allMessages)} messages")
|
|
|
|
# Try different approaches to find the message
|
|
message = None
|
|
|
|
# Exact match
|
|
message = next((m for m in allMessages if m.get("id") == messageId), None)
|
|
|
|
# Case-insensitive match
|
|
if not message and isinstance(messageId, str):
|
|
message = next((m for m in allMessages
|
|
if isinstance(m.get("id"), str) and m.get("id").lower() == messageId.lower()), None)
|
|
|
|
# Partial match (starts with)
|
|
if not message and isinstance(messageId, str):
|
|
message = next((m for m in allMessages
|
|
if isinstance(m.get("id"), str) and m.get("id").startswith(messageId)), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Message {messageId} not found in workflow {workflowId}")
|
|
return False
|
|
|
|
# Log the found message
|
|
logger.info(f"Found message: {message.get('id')}")
|
|
|
|
# Check if message has documents
|
|
if "documents" not in message or not message["documents"]:
|
|
logger.warning(f"No documents in message {messageId}")
|
|
return False
|
|
|
|
# Log existing documents
|
|
documents = message.get("documents", [])
|
|
logger.debug(f"Message has {len(documents)} documents")
|
|
for i, doc in enumerate(documents):
|
|
docId = doc.get("id", "unknown")
|
|
fileIdValue = doc.get("fileId", "unknown")
|
|
logger.debug(f"Document {i}: docId={docId}, fileId={fileIdValue}")
|
|
|
|
# Create a new list of documents without the one to delete
|
|
updatedDocuments = []
|
|
removed = False
|
|
|
|
for doc in documents:
|
|
docId = doc.get("id")
|
|
fileIdValue = doc.get("fileId")
|
|
|
|
# Flexible matching approach
|
|
shouldRemove = (
|
|
(docId == fileId) or
|
|
(fileIdValue == fileId) or
|
|
(isinstance(docId, str) and str(fileId) in docId) or
|
|
(isinstance(fileIdValue, str) and str(fileId) in fileIdValue)
|
|
)
|
|
|
|
if shouldRemove:
|
|
removed = True
|
|
logger.info(f"Found file to remove: docId={docId}, fileId={fileIdValue}")
|
|
else:
|
|
updatedDocuments.append(doc)
|
|
|
|
if not removed:
|
|
logger.warning(f"No matching file {fileId} found in message {messageId}")
|
|
return False
|
|
|
|
# Update message with modified documents array
|
|
messageUpdate = {
|
|
"documents": updatedDocuments
|
|
}
|
|
|
|
# Apply the update directly to the database
|
|
updated = self.db.recordModify("workflowMessages", message["id"], messageUpdate)
|
|
|
|
if updated:
|
|
logger.info(f"Successfully removed file {fileId} from message {messageId}")
|
|
return True
|
|
else:
|
|
logger.warning(f"Failed to update message {messageId} in database")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error removing file {fileId} from message {messageId}: {str(e)}")
|
|
return False
|
|
|
|
|
|
# Workflow Logs
|
|
|
|
def getWorkflowLogs(self, workflowId: str) -> List[Dict[str, Any]]:
|
|
"""Returns all log entries for a workflow"""
|
|
return self.db.getRecordset("workflowLogs", recordFilter={"workflowDd": workflowId})
|
|
|
|
def createWorkflowLog(self, logData: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Creates a new log entry for a workflow"""
|
|
# Make sure required fields are present
|
|
if "timestamp" not in logData:
|
|
logData["timestamp"] = self._getCurrentTimestamp()
|
|
|
|
# Add status information if not present
|
|
if "status" not in logData and "type" in logData:
|
|
if logData["type"] == "error":
|
|
logData["status"] = "error"
|
|
else:
|
|
logData["status"] = "running"
|
|
|
|
# Add progress information if not present
|
|
if "progress" not in logData:
|
|
# Default progress values based on log type
|
|
if logData.get("type") == "info":
|
|
logData["progress"] = 50 # Default middle progress
|
|
elif logData.get("type") == "error":
|
|
logData["progress"] = -1 # Error state
|
|
elif logData.get("type") == "warning":
|
|
logData["progress"] = 50 # Default middle progress
|
|
|
|
return self.db.recordCreate("workflowLogs", logData)
|
|
|
|
|
|
# Workflow Management
|
|
|
|
def saveWorkflowState(self, workflow: Dict[str, Any], saveMessages: bool = True, saveLogs: bool = True) -> bool:
|
|
"""
|
|
Saves the state of a workflow to the database.
|
|
Workflow data is updated, but messages are stored separately.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
saveMessages: Flag to determine if messages should be saved
|
|
saveLogs: Flag to determine if logs should be saved
|
|
|
|
Returns:
|
|
True on success, False on failure
|
|
"""
|
|
try:
|
|
workflowId = workflow.get("id")
|
|
if not workflowId:
|
|
return False
|
|
|
|
# Extract only the database-relevant workflow fields
|
|
# IMPORTANT: Don't store messages in the workflow table!
|
|
workflowDbData = {
|
|
"id": workflowId,
|
|
"mandateId": workflow.get("mandateId", self.mandateId),
|
|
"userId": workflow.get("userId", self.userId),
|
|
"name": workflow.get("name", f"Workflow {workflowId}"),
|
|
"status": workflow.get("status", "completed"),
|
|
"startedAt": workflow.get("startedAt", self._getCurrentTimestamp()),
|
|
"lastActivity": workflow.get("lastActivity", self._getCurrentTimestamp()),
|
|
"dataStats": workflow.get("dataStats", {})
|
|
}
|
|
|
|
# Check if workflow already exists
|
|
existingWorkflow = self.getWorkflow(workflowId)
|
|
if existingWorkflow:
|
|
self.updateWorkflow(workflowId, workflowDbData)
|
|
else:
|
|
self.createWorkflow(workflowDbData)
|
|
|
|
# Save messages
|
|
if saveMessages and "messages" in workflow:
|
|
for message in workflow["messages"]:
|
|
messageId = message.get("id")
|
|
if not messageId:
|
|
continue
|
|
|
|
# Since each message is already saved with createWorkflowMessage,
|
|
# we only need to check if updates are necessary
|
|
# First, get existing message from database
|
|
existingMessages = self.getWorkflowMessages(workflowId)
|
|
existingMessage = next((m for m in existingMessages if m.get("id") == messageId), None)
|
|
|
|
if existingMessage:
|
|
# Check if updates are needed
|
|
hasChanges = False
|
|
for key in ["role", "agentName", "content", "status", "documents"]:
|
|
if key in message and message.get(key) != existingMessage.get(key):
|
|
hasChanges = True
|
|
break
|
|
|
|
if hasChanges:
|
|
# Extract only relevant data for the database
|
|
messageData = {
|
|
"role": message.get("role", existingMessage.get("role", "unknown")),
|
|
"content": message.get("content", existingMessage.get("content", "")),
|
|
"agentName": message.get("agentName", existingMessage.get("agentName", "")),
|
|
"status": message.get("status", existingMessage.get("status", "completed")),
|
|
"documents": message.get("documents", existingMessage.get("documents", []))
|
|
}
|
|
self.updateWorkflowMessage(messageId, messageData)
|
|
else:
|
|
# Message doesn't exist in database yet
|
|
# It should have been saved via createWorkflowMessage
|
|
# If not, log a warning
|
|
logger.warning(f"Message {messageId} in workflow {workflowId} not found in database")
|
|
|
|
# Save logs
|
|
if saveLogs and "logs" in workflow:
|
|
# Get existing logs
|
|
existingLogs = {log["id"]: log for log in self.getWorkflowLogs(workflowId)}
|
|
|
|
for log in workflow["logs"]:
|
|
logId = log.get("id")
|
|
if not logId:
|
|
continue
|
|
|
|
# Extract only relevant data for the database
|
|
logData = {
|
|
"id": logId,
|
|
"workflowId": workflowId,
|
|
"message": log.get("message", ""),
|
|
"type": log.get("type", "info"),
|
|
"timestamp": log.get("timestamp", self._getCurrentTimestamp()),
|
|
"agentName": log.get("agentName", "(undefined)"),
|
|
"status": log.get("status", "running"),
|
|
"progress": log.get("progress", 50)
|
|
}
|
|
|
|
# Create or update log
|
|
if logId in existingLogs:
|
|
self.db.recordModify("workflowLogs", logId, logData)
|
|
else:
|
|
self.db.recordCreate("workflowLogs", logData)
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error saving workflow state: {str(e)}")
|
|
return False
|
|
|
|
def loadWorkflowState(self, workflowId: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Loads the complete state of a workflow from the database.
|
|
This includes the workflow itself, messages, and logs.
|
|
|
|
Args:
|
|
workflowId: ID of the workflow to load
|
|
|
|
Returns:
|
|
The complete workflow object or None on error
|
|
"""
|
|
try:
|
|
# Load base workflow
|
|
workflow = self.getWorkflow(workflowId)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Log the workflow base retrieval
|
|
logger.debug(f"Loaded base workflow {workflowId} from database")
|
|
|
|
# Load messages
|
|
messages = self.getWorkflowMessages(workflowId)
|
|
# Sort by sequence number
|
|
messages.sort(key=lambda x: x.get("sequenceNo", 0))
|
|
|
|
# Debug log for messages and document counts
|
|
messageCount = len(messages)
|
|
logger.debug(f"Loaded {messageCount} messages for workflow {workflowId}")
|
|
|
|
# Check if messageIds exists and is valid
|
|
messageIds = workflow.get("messageIds", [])
|
|
if not messageIds or len(messageIds) != len(messages):
|
|
# Rebuild messageIds from messages
|
|
messageIds = [msg.get("id") for msg in messages]
|
|
# Update in database
|
|
self.updateWorkflow(workflowId, {"messageIds": messageIds})
|
|
logger.info(f"Rebuilt messageIds for workflow {workflowId}")
|
|
|
|
# Log document counts for each message
|
|
for msg in messages:
|
|
docCount = len(msg.get("documents", []))
|
|
if docCount > 0:
|
|
logger.info(f"Message {msg.get('id')} has {docCount} documents loaded from database")
|
|
|
|
# Load logs
|
|
logs = self.getWorkflowLogs(workflowId)
|
|
# Sort by timestamp
|
|
logs.sort(key=lambda x: x.get("timestamp", ""))
|
|
|
|
# Assemble complete workflow object
|
|
completeWorkflow = workflow.copy()
|
|
completeWorkflow["messages"] = messages
|
|
completeWorkflow["messageIds"] = messageIds # Ensure messageIds is included
|
|
completeWorkflow["logs"] = logs
|
|
|
|
return completeWorkflow
|
|
except Exception as e:
|
|
logger.error(f"Error loading workflow state: {str(e)}")
|
|
return None
|
|
|
|
|
|
# Singleton factory for LucyDOMInterface instances per context
|
|
_lucydomInterfaces = {}
|
|
|
|
def getLucydomInterface(mandateId: int = 0, userId: int = 0) -> LucyDOMInterface:
|
|
"""
|
|
Returns a LucyDOMInterface instance for the specified context.
|
|
Reuses existing instances.
|
|
|
|
Args:
|
|
mandateId: ID of the mandate
|
|
userId: ID of the user
|
|
|
|
Returns:
|
|
LucyDOMInterface instance
|
|
"""
|
|
contextKey = f"{mandateId}_{userId}"
|
|
if contextKey not in _lucydomInterfaces:
|
|
# Create new interface instance
|
|
interface = LucyDOMInterface(mandateId, userId)
|
|
# Initialize AI service
|
|
aiService = ChatService()
|
|
interface.aiService = aiService # Directly set the attribute
|
|
_lucydomInterfaces[contextKey] = interface
|
|
return _lucydomInterfaces[contextKey]
|
|
|
|
# Init
|
|
getLucydomInterface() |