1271 lines
No EOL
52 KiB
Python
1271 lines
No EOL
52 KiB
Python
"""
|
|
Interface to LucyDOM database and AI Connectors.
|
|
Uses the JSON connector for data access with added language support.
|
|
"""
|
|
|
|
import os
|
|
import logging
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional, Union
|
|
|
|
import importlib
|
|
import hashlib
|
|
|
|
# DYNAMIC PART: Connectors to the Interface
|
|
from connectors.connector_db_json import DatabaseConnector
|
|
from connectors.connector_aichat_openai import ChatService
|
|
|
|
# Basic Configurations
|
|
from modules.configuration import APP_CONFIG
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Custom exceptions for file handling
|
|
class FileError(Exception):
|
|
"""Base class for file handling exceptions."""
|
|
pass
|
|
|
|
class FileNotFoundError(FileError):
|
|
"""Exception raised when a file is not found."""
|
|
pass
|
|
|
|
class FileStorageError(FileError):
|
|
"""Exception raised when there's an error storing a file."""
|
|
pass
|
|
|
|
class FilePermissionError(FileError):
|
|
"""Exception raised when there's a permission issue with a file."""
|
|
pass
|
|
|
|
class FileDeletionError(FileError):
|
|
"""Exception raised when there's an error deleting a file."""
|
|
pass
|
|
|
|
|
|
class LucyDOMInterface:
|
|
"""
|
|
Interface to the LucyDOM database.
|
|
Uses the JSON connector for data access.
|
|
"""
|
|
|
|
def __init__(self, mandate_id: int, user_id: int):
|
|
"""
|
|
Initializes the LucyDOM Interface with mandate and user context.
|
|
|
|
Args:
|
|
mandate_id: ID of the current mandate
|
|
user_id: ID of the current user
|
|
"""
|
|
self.mandate_id = mandate_id
|
|
self.user_id = user_id
|
|
|
|
# Add language settings
|
|
self.user_language = "en" # Default user language
|
|
self.ai_service = None # Will be set externally
|
|
|
|
# Import data model module
|
|
try:
|
|
self.model_module = importlib.import_module("modules.lucydom_model")
|
|
logger.info("lucydom_model successfully imported")
|
|
except ImportError as e:
|
|
logger.error(f"Error importing lucydom_model: {e}")
|
|
raise
|
|
|
|
# Initialize database if needed
|
|
self._initialize_database()
|
|
|
|
def _initialize_database(self):
|
|
"""
|
|
Initializes the database with minimal objects for the logged-in user in the mandate, if it doesn't exist yet.
|
|
No initialization without a valid user.
|
|
Creates an initial dataset for each table defined in the data model.
|
|
"""
|
|
effective_mandate_id = self.mandate_id
|
|
effective_user_id = self.user_id
|
|
if effective_mandate_id is None or effective_user_id is None:
|
|
#data available
|
|
return
|
|
|
|
self.db = DatabaseConnector(
|
|
db_host=APP_CONFIG.get("DB_LUCYDOM_HOST"),
|
|
db_database=APP_CONFIG.get("DB_LUCYDOM_DATABASE"),
|
|
db_user=APP_CONFIG.get("DB_LUCYDOM_USER"),
|
|
db_password=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"),
|
|
mandate_id=self.mandate_id,
|
|
user_id=self.user_id
|
|
)
|
|
|
|
# Initialize standard prompts for different areas
|
|
prompts = self.db.get_recordset("prompts")
|
|
if not prompts:
|
|
logger.info("Creating standard prompts")
|
|
|
|
# Define standard prompts
|
|
standard_prompts = [
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Research the current market trends and developments in [TOPIC]. Collect information about leading companies, innovative products or services, and current challenges. Present the results in a structured overview with relevant data and sources.",
|
|
"name": "Web Research: Market Research"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Analyze the attached dataset on [TOPIC] and identify the most important trends, patterns, and anomalies. Perform statistical calculations to support your findings. Present the results in a clearly structured analysis and draw relevant conclusions.",
|
|
"name": "Analysis: Data Analysis"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Create a detailed protocol of our meeting on [TOPIC]. Capture all discussed points, decisions made, and agreed measures. Structure the protocol clearly with agenda items, participant list, and clear responsibilities for follow-up actions.",
|
|
"name": "Protocol: Meeting Minutes"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Develop a UI/UX design concept for [APPLICATION/WEBSITE]. Consider the target audience, main functions, and brand identity. Describe the visual design, navigation, interaction patterns, and information architecture. Explain how the design optimizes user-friendliness and user experience.",
|
|
"name": "Design: UI/UX Design"
|
|
}
|
|
]
|
|
|
|
# Create prompts
|
|
for prompt_data in standard_prompts:
|
|
created_prompt = self.db.record_create("prompts", prompt_data)
|
|
logger.info(f"Prompt '{prompt_data.get('name', 'Standard')}' was created with ID {created_prompt['id']}")
|
|
|
|
# Language support methods
|
|
|
|
def set_user_language(self, language_code: str):
|
|
"""Set the user's preferred language"""
|
|
self.user_language = language_code
|
|
logger.info(f"User language set to: {language_code}")
|
|
|
|
async def call_ai(self, messages: List[Dict[str, str]], produce_user_answer: bool = False, temperature: float = None) -> str:
|
|
"""
|
|
Enhanced AI service call with language support
|
|
|
|
Args:
|
|
messages: List of message dictionaries
|
|
produce_user_answer: Whether this response is for the end-user
|
|
temperature: Optional temperature setting
|
|
|
|
Returns:
|
|
AI response text
|
|
"""
|
|
if not self.ai_service:
|
|
logger.error("AI service not set in LucyDOMInterface")
|
|
return "Error: AI service not available"
|
|
|
|
# Add language instruction for user-facing responses
|
|
if produce_user_answer and self.user_language:
|
|
ltext= f"Please respond in '{self.user_language}' language."
|
|
if messages and messages[0]["role"] == "system":
|
|
if "language" not in messages[0]["content"].lower():
|
|
messages[0]["content"] = f"{ltext} {messages[0]['content']}"
|
|
else:
|
|
# Insert a system message with language instruction
|
|
messages.insert(0, {
|
|
"role": "system",
|
|
"content": ltext
|
|
})
|
|
|
|
# Call the AI service
|
|
if temperature is not None:
|
|
return await self.ai_service.call_api(messages, temperature=temperature)
|
|
else:
|
|
return await self.ai_service.call_api(messages)
|
|
|
|
# Utilities
|
|
|
|
def get_initial_id(self, table: str) -> Optional[int]:
|
|
"""
|
|
Returns the initial ID for a table.
|
|
|
|
Args:
|
|
table: Name of the table
|
|
|
|
Returns:
|
|
The initial ID or None if not present
|
|
"""
|
|
return self.db.get_initial_id(table)
|
|
|
|
def _get_current_timestamp(self) -> str:
|
|
"""Returns the current timestamp in ISO format"""
|
|
return datetime.now().isoformat()
|
|
|
|
|
|
# Prompt methods
|
|
|
|
def get_all_prompts(self) -> List[Dict[str, Any]]:
|
|
"""Returns all prompts for the current mandate"""
|
|
return self.db.get_recordset("prompts")
|
|
|
|
def get_prompt(self, prompt_id: int) -> Optional[Dict[str, Any]]:
|
|
"""Returns a prompt by its ID"""
|
|
prompts = self.db.get_recordset("prompts", record_filter={"id": prompt_id})
|
|
if prompts:
|
|
return prompts[0]
|
|
return None
|
|
|
|
def create_prompt(self, content: str, name: str) -> Dict[str, Any]:
|
|
"""Creates a new prompt"""
|
|
prompt_data = {
|
|
"mandate_id": self.mandate_id,
|
|
"user_id": self.user_id,
|
|
"content": content,
|
|
"name": name,
|
|
"created_at": self._get_current_timestamp()
|
|
}
|
|
|
|
return self.db.record_create("prompts", prompt_data)
|
|
|
|
def update_prompt(self, prompt_id: int, content: str = None, name: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing prompt
|
|
|
|
Args:
|
|
prompt_id: ID of the prompt to update
|
|
content: New content for the prompt
|
|
|
|
Returns:
|
|
The updated prompt object
|
|
"""
|
|
# Check if the prompt exists
|
|
prompt = self.get_prompt(prompt_id)
|
|
if not prompt:
|
|
return None
|
|
|
|
# Prepare data for update
|
|
prompt_data = {}
|
|
|
|
if content is not None:
|
|
prompt_data["content"] = content
|
|
if name is not None:
|
|
prompt_data["name"] = name
|
|
|
|
# Update prompt
|
|
return self.db.record_modify("prompts", prompt_id, prompt_data)
|
|
|
|
def delete_prompt(self, prompt_id: int) -> bool:
|
|
"""
|
|
Deletes a prompt from the database
|
|
|
|
Args:
|
|
prompt_id: ID of the prompt to delete
|
|
|
|
Returns:
|
|
True if the prompt was successfully deleted, otherwise False
|
|
"""
|
|
return self.db.record_delete("prompts", prompt_id)
|
|
|
|
|
|
# File Utilities
|
|
|
|
def calculate_file_hash(self, file_content: bytes) -> str:
|
|
"""Calculates a SHA-256 hash for the file content"""
|
|
return hashlib.sha256(file_content).hexdigest()
|
|
|
|
def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
|
"""Checks if a file with the same hash already exists"""
|
|
files = self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def get_mime_type(self, filename: str) -> str:
|
|
"""Determines the MIME type based on the file extension"""
|
|
import os
|
|
ext = os.path.splitext(filename)[1].lower()[1:]
|
|
extension_to_mime = {
|
|
"pdf": "application/pdf",
|
|
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"doc": "application/msword",
|
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"xls": "application/vnd.ms-excel",
|
|
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
"ppt": "application/vnd.ms-powerpoint",
|
|
"csv": "text/csv",
|
|
"txt": "text/plain",
|
|
"json": "application/json",
|
|
"xml": "application/xml",
|
|
"html": "text/html",
|
|
"htm": "text/html",
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"png": "image/png",
|
|
"gif": "image/gif",
|
|
"webp": "image/webp",
|
|
"svg": "image/svg+xml",
|
|
"py": "text/x-python",
|
|
"js": "application/javascript",
|
|
"css": "text/css"
|
|
}
|
|
return extension_to_mime.get(ext.lower(), "application/octet-stream")
|
|
|
|
|
|
# File methods - metadata-based operations
|
|
|
|
def get_all_files(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Returns all files for the current mandate without binary data.
|
|
|
|
Returns:
|
|
List of FileItem objects without binary data
|
|
"""
|
|
files = self.db.get_recordset("files")
|
|
return files
|
|
|
|
def get_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Returns a file by its ID, without binary data.
|
|
|
|
Args:
|
|
file_id: ID of the file
|
|
|
|
Returns:
|
|
FileItem without binary data or None if not found
|
|
"""
|
|
files = self.db.get_recordset("files", record_filter={"id": file_id})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def create_file(self, name: str, mime_type: str, size: int = None, file_hash: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Creates a new file entry in the database without content.
|
|
The actual file content is stored separately in the FileData table.
|
|
|
|
Args:
|
|
name: Name of the file
|
|
mime_type: MIME type of the file
|
|
size: Size of the file in bytes
|
|
file_hash: Hash value of the file for deduplication
|
|
|
|
Returns:
|
|
The created FileItem object
|
|
"""
|
|
file_data = {
|
|
"mandate_id": self.mandate_id,
|
|
"user_id": self.user_id,
|
|
"name": name,
|
|
"mime_type": mime_type,
|
|
"size": size,
|
|
"file_hash": file_hash,
|
|
"creation_date": self._get_current_timestamp()
|
|
}
|
|
return self.db.record_create("files", file_data)
|
|
|
|
def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates the metadata of an existing file without affecting the binary data.
|
|
|
|
Args:
|
|
file_id: ID of the file to update
|
|
update_data: Dictionary with fields to update
|
|
|
|
Returns:
|
|
The updated FileItem object
|
|
"""
|
|
# Check if the file exists
|
|
file = self.get_file(file_id)
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {file_id} not found")
|
|
|
|
# Update file
|
|
return self.db.record_modify("files", file_id, update_data)
|
|
|
|
def delete_file(self, file_id: int) -> bool:
|
|
"""
|
|
Deletes a file from the database (metadata and content).
|
|
|
|
Args:
|
|
file_id: ID of the file
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Find the file in the database
|
|
file = self.get_file(file_id)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {file_id} not found")
|
|
|
|
# Check if the file belongs to the current mandate
|
|
if file.get("mandate_id") != self.mandate_id:
|
|
raise FilePermissionError(f"No permission to delete file {file_id}")
|
|
|
|
# Check for other references to this file (by hash)
|
|
file_hash = file.get("file_hash")
|
|
if file_hash:
|
|
other_references = [f for f in self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
|
if f.get("id") != file_id]
|
|
|
|
# If other files reference this content, only delete the database entry for FileItem
|
|
if other_references:
|
|
logger.info(f"Other references to the file content found, only FileItem will be deleted: {file_id}")
|
|
else:
|
|
# Also delete the file content in the FileData table
|
|
try:
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
if file_data_entries:
|
|
self.db.record_delete("file_data", file_id)
|
|
logger.info(f"FileData for file {file_id} deleted")
|
|
except Exception as e:
|
|
logger.warning(f"Error deleting FileData for file {file_id}: {str(e)}")
|
|
|
|
# Delete the FileItem entry
|
|
return self.db.record_delete("files", file_id)
|
|
|
|
except FileNotFoundError as e:
|
|
# Pass through FileNotFoundError
|
|
raise
|
|
except FilePermissionError as e:
|
|
# Pass through FilePermissionError
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error deleting file {file_id}: {str(e)}")
|
|
raise FileDeletionError(f"Error deleting file: {str(e)}")
|
|
|
|
|
|
# FileData methods - binary data operations
|
|
|
|
def create_file_data(self, file_id: int, data: bytes) -> bool:
|
|
"""
|
|
Stores the binary data of a file in the database as a Base64 string.
|
|
|
|
Args:
|
|
file_id: ID of the associated file
|
|
data: Binary data
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert binary data to base64 string
|
|
if isinstance(data, bytes):
|
|
encoded_data = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encoded_data)}")
|
|
else:
|
|
logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...")
|
|
# Try to convert to bytes if it's not already
|
|
if isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encoded_data = data
|
|
logger.info(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.info(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Create the file_data record with encoded data
|
|
file_data = {
|
|
"id": file_id,
|
|
"data": encoded_data
|
|
}
|
|
|
|
self.db.record_create("file_data", file_data)
|
|
logger.info(f"Successfully stored encoded data for file {file_id}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error storing binary data for file {file_id}: {str(e)}")
|
|
return False
|
|
|
|
def get_file_data(self, file_id: int) -> Optional[bytes]:
|
|
"""
|
|
Returns the binary data of a file.
|
|
Converts Base64 string from the database back to bytes.
|
|
|
|
Args:
|
|
file_id: ID of the file
|
|
|
|
Returns:
|
|
Binary data or None if not found
|
|
"""
|
|
import base64
|
|
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
if file_data_entries and "data" in file_data_entries[0]:
|
|
encoded_data = file_data_entries[0]["data"]
|
|
|
|
try:
|
|
# Check if it's a string (most likely base64)
|
|
if isinstance(encoded_data, str):
|
|
try:
|
|
# Try to decode base64
|
|
binary_data = base64.b64decode(encoded_data)
|
|
logger.debug(f"Successfully decoded base64 string to {len(binary_data)} bytes")
|
|
return binary_data
|
|
except Exception as e:
|
|
logger.error(f"Failed to decode base64 data: {str(e)}")
|
|
# If it's not valid base64, return as bytes
|
|
return encoded_data.encode('utf-8')
|
|
# If it's already bytes (shouldn't happen with model change)
|
|
elif isinstance(encoded_data, bytes):
|
|
logger.warning(f"Data was already bytes, no conversion needed")
|
|
return encoded_data
|
|
else:
|
|
logger.error(f"Unexpected data type in database: {type(encoded_data)}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error processing file data: {str(e)}")
|
|
return None
|
|
else:
|
|
logger.warning(f"No data found for file ID {file_id}")
|
|
return None
|
|
|
|
def update_file_data(self, file_id: int, data: Union[bytes, str]) -> bool:
|
|
"""
|
|
Updates the binary data of a file in the database.
|
|
Converts bytes to Base64 string for storage.
|
|
|
|
Args:
|
|
file_id: ID of the file
|
|
data: New binary data or encoded data
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert data to base64 string if it's bytes
|
|
if isinstance(data, bytes):
|
|
encoded_data = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string")
|
|
elif isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encoded_data = data
|
|
logger.debug(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.debug(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Check if a record already exists
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
|
|
if file_data_entries:
|
|
# Update the existing record
|
|
self.db.record_modify("file_data", file_id, {"data": encoded_data})
|
|
logger.info(f"Updated existing file data for file ID {file_id}")
|
|
else:
|
|
# Create a new record
|
|
file_data = {
|
|
"id": file_id,
|
|
"data": encoded_data
|
|
}
|
|
self.db.record_create("file_data", file_data)
|
|
logger.info(f"Created new file data for file ID {file_id}")
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error updating binary data for file {file_id}: {str(e)}")
|
|
return False
|
|
|
|
def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]:
|
|
"""
|
|
Saves an uploaded file in the database.
|
|
Metadata is stored in the 'files' table,
|
|
Binary data in the 'file_data' table as a Base64 string.
|
|
|
|
Args:
|
|
file_content: Binary data of the file
|
|
file_name: Name of the file
|
|
|
|
Returns:
|
|
Dictionary with metadata of the saved file
|
|
"""
|
|
try:
|
|
# Debug: Log the start of the file upload process
|
|
logger.info(f"Starting upload process for file: {file_name}")
|
|
|
|
# Debug: Check if file_content is valid bytes
|
|
if not isinstance(file_content, bytes):
|
|
logger.error(f"Invalid file_content type: {type(file_content)}")
|
|
raise ValueError(f"file_content must be bytes, got {type(file_content)}")
|
|
|
|
# Calculate file hash for deduplication
|
|
file_hash = self.calculate_file_hash(file_content)
|
|
logger.debug(f"Calculated file hash: {file_hash}")
|
|
|
|
# Check for duplicate
|
|
existing_file = self.check_for_duplicate_file(file_hash)
|
|
if existing_file:
|
|
# Simply return the existing file metadata
|
|
logger.info(f"Duplicate found for {file_name}: {existing_file['id']}")
|
|
return existing_file
|
|
|
|
# Determine MIME type
|
|
mime_type = self.get_mime_type(file_name)
|
|
|
|
# Determine file size
|
|
file_size = len(file_content)
|
|
|
|
# 1. Save metadata in the 'files' table
|
|
logger.info(f"Saving file metadata to database for file: {file_name}")
|
|
db_file = self.create_file(
|
|
name=file_name,
|
|
mime_type=mime_type,
|
|
size=file_size,
|
|
file_hash=file_hash
|
|
)
|
|
|
|
# 2. Save binary data as Base64 string in the 'file_data' table
|
|
logger.info(f"Saving file content to database for file: {file_name}")
|
|
self.create_file_data(db_file["id"], file_content)
|
|
|
|
# Debug: Export file to static folder
|
|
self._export_file_to_static(file_content, db_file["id"], file_name) # DEBUG TODO
|
|
|
|
# Debug: Verify database record was created
|
|
if not db_file:
|
|
logger.warning(f"Database record for file {file_name} was not created properly")
|
|
else:
|
|
logger.debug(f"Database record created for file {file_name}")
|
|
|
|
logger.info(f"File upload process completed for: {file_name}")
|
|
return db_file
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in save_uploaded_file for {file_name}: {str(e)}", exc_info=True)
|
|
raise FileStorageError(f"Error saving file: {str(e)}")
|
|
|
|
def download_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Returns a file for download, including binary data.
|
|
|
|
Args:
|
|
file_id: ID of the file
|
|
|
|
Returns:
|
|
Dictionary with file data and metadata or None if not found
|
|
"""
|
|
try:
|
|
# 1. Get metadata from the 'files' table
|
|
file = self.get_file(file_id)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"File with ID {file_id} not found")
|
|
|
|
# 2. Get binary data from the 'file_data' table
|
|
file_content = self.get_file_data(file_id)
|
|
|
|
if file_content is None:
|
|
raise FileNotFoundError(f"Binary data for file with ID {file_id} not found")
|
|
|
|
return {
|
|
"id": file_id,
|
|
"name": file.get("name", f"file_{file_id}"),
|
|
"content_type": file.get("mime_type", "application/octet-stream"),
|
|
"size": file.get("size", len(file_content)),
|
|
"content": file_content
|
|
}
|
|
except FileNotFoundError as e:
|
|
# Re-raise FileNotFoundError as is
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Error downloading file {file_id}: {str(e)}")
|
|
raise FileError(f"Error downloading file: {str(e)}")
|
|
|
|
def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str):
|
|
debug_filename = f"{file_id}_{file_name}"
|
|
with open(f"./static/{debug_filename}", 'wb') as f:
|
|
f.write(file_content)
|
|
|
|
# Workflow methods
|
|
|
|
def get_all_workflows(self) -> List[Dict[str, Any]]:
|
|
"""Returns all workflows for the current mandate"""
|
|
return self.db.get_recordset("workflows")
|
|
|
|
def get_workflows_by_user(self, user_id: int) -> List[Dict[str, Any]]:
|
|
"""Returns all workflows for a user"""
|
|
return self.db.get_recordset("workflows", record_filter={"user_id": user_id})
|
|
|
|
def get_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Returns a workflow by its ID"""
|
|
workflows = self.db.get_recordset("workflows", record_filter={"id": workflow_id})
|
|
if workflows:
|
|
return workflows[0]
|
|
return None
|
|
|
|
def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Creates a new workflow in the database"""
|
|
# Make sure mandate_id and user_id are set
|
|
if "mandate_id" not in workflow_data:
|
|
workflow_data["mandate_id"] = self.mandate_id
|
|
|
|
if "user_id" not in workflow_data:
|
|
workflow_data["user_id"] = self.user_id
|
|
|
|
# Set timestamp if not present
|
|
current_time = self._get_current_timestamp()
|
|
if "started_at" not in workflow_data:
|
|
workflow_data["started_at"] = current_time
|
|
|
|
if "last_activity" not in workflow_data:
|
|
workflow_data["last_activity"] = current_time
|
|
|
|
return self.db.record_create("workflows", workflow_data)
|
|
|
|
def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing workflow.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow to update
|
|
workflow_data: New data for the workflow
|
|
|
|
Returns:
|
|
The updated workflow object
|
|
"""
|
|
# Check if the workflow exists
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Set update time
|
|
workflow_data["last_activity"] = self._get_current_timestamp()
|
|
|
|
# Update workflow
|
|
return self.db.record_modify("workflows", workflow_id, workflow_data)
|
|
|
|
def delete_workflow(self, workflow_id: str) -> bool:
|
|
"""
|
|
Deletes a workflow from the database.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow to delete
|
|
|
|
Returns:
|
|
True on success, False if the workflow doesn't exist
|
|
"""
|
|
# Check if the workflow exists
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return False
|
|
|
|
# Check if the user is the owner or has admin rights
|
|
if workflow.get("user_id") != self.user_id:
|
|
# Here could be a check for admin rights
|
|
return False
|
|
|
|
# Delete workflow
|
|
return self.db.record_delete("workflows", workflow_id)
|
|
|
|
|
|
# Workflow Messages
|
|
|
|
def get_workflow_messages(self, workflow_id: str) -> List[Dict[str, Any]]:
|
|
"""Returns all messages of a workflow"""
|
|
return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id})
|
|
|
|
def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Creates a new message for a workflow.
|
|
|
|
Args:
|
|
message_data: The message data
|
|
|
|
Returns:
|
|
The created message or None on error
|
|
"""
|
|
try:
|
|
# Check if required fields are present
|
|
required_fields = ["id", "workflow_id"]
|
|
for field in required_fields:
|
|
if field not in message_data:
|
|
logger.error(f"Required field '{field}' missing in message_data")
|
|
raise ValueError(f"Required field '{field}' missing in message data")
|
|
|
|
# Validate that ID is not None
|
|
if message_data["id"] is None:
|
|
message_data["id"] = f"msg_{uuid.uuid4()}"
|
|
logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}")
|
|
|
|
# Ensure required fields are present
|
|
if "started_at" not in message_data and "created_at" not in message_data:
|
|
message_data["started_at"] = self._get_current_timestamp()
|
|
|
|
if "created_at" in message_data and "started_at" not in message_data:
|
|
message_data["started_at"] = message_data["created_at"]
|
|
del message_data["created_at"]
|
|
|
|
# Set status if not present
|
|
if "status" not in message_data:
|
|
message_data["status"] = "completed"
|
|
|
|
# Set sequence number if not present
|
|
if "sequence_no" not in message_data:
|
|
# Get current messages to determine next sequence number
|
|
existing_messages = self.get_workflow_messages(message_data["workflow_id"])
|
|
message_data["sequence_no"] = len(existing_messages) + 1
|
|
|
|
# Ensure role and agent_name are present
|
|
if "role" not in message_data:
|
|
message_data["role"] = "assistant" if message_data.get("agent_name") else "user"
|
|
|
|
if "agent_name" not in message_data:
|
|
message_data["agent_name"] = ""
|
|
|
|
# Debug log for data to create
|
|
logger.debug(f"Creating workflow message with data: {message_data}")
|
|
|
|
# Create message in database
|
|
created_message = self.db.record_create("workflow_messages", message_data)
|
|
|
|
# Update workflow's message_ids if this is a new message
|
|
if created_message:
|
|
workflow_id = message_data["workflow_id"]
|
|
workflow = self.get_workflow(workflow_id)
|
|
|
|
if workflow:
|
|
# Get current message_ids or initialize empty list
|
|
message_ids = workflow.get("message_ids", [])
|
|
|
|
# Add the new message ID if not already in the list
|
|
if created_message["id"] not in message_ids:
|
|
message_ids.append(created_message["id"])
|
|
self.update_workflow(workflow_id, {"message_ids": message_ids})
|
|
|
|
return created_message
|
|
except Exception as e:
|
|
logger.error(f"Error creating workflow message: {str(e)}")
|
|
# Return None instead of raising to avoid cascading failures
|
|
return None
|
|
|
|
def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing workflow message in the database.
|
|
|
|
Args:
|
|
message_id: ID of the message
|
|
message_data: Data to update
|
|
|
|
Returns:
|
|
The updated message object or None on error
|
|
"""
|
|
try:
|
|
# Debug info
|
|
logger.debug(f"Updating message {message_id} in database")
|
|
|
|
# Ensure message_id is provided
|
|
if not message_id:
|
|
logger.error("No message_id provided for update_workflow_message")
|
|
raise ValueError("message_id cannot be empty")
|
|
|
|
# Check if message exists in database
|
|
messages = self.db.get_recordset("workflow_messages", record_filter={"id": message_id})
|
|
if not messages:
|
|
logger.warning(f"Message with ID {message_id} does not exist in database")
|
|
|
|
# If message doesn't exist but we have workflow_id, create it
|
|
if "workflow_id" in message_data:
|
|
logger.info(f"Creating new message with ID {message_id} for workflow {message_data.get('workflow_id')}")
|
|
return self.db.record_create("workflow_messages", message_data)
|
|
else:
|
|
logger.error(f"Workflow ID missing for new message {message_id}")
|
|
return None
|
|
|
|
# Update existing message
|
|
existing_message = messages[0]
|
|
|
|
# Ensure required fields present
|
|
for key in ["role", "agent_name"]:
|
|
if key not in message_data and key not in existing_message:
|
|
message_data[key] = "assistant" if key == "role" else ""
|
|
|
|
# Ensure ID is in the dataset
|
|
if 'id' not in message_data:
|
|
message_data['id'] = message_id
|
|
|
|
# Convert created_at to started_at if needed
|
|
if "created_at" in message_data and "started_at" not in message_data:
|
|
message_data["started_at"] = message_data["created_at"]
|
|
del message_data["created_at"]
|
|
|
|
# Update the message
|
|
updated_message = self.db.record_modify("workflow_messages", message_id, message_data)
|
|
if updated_message:
|
|
logger.info(f"Message {message_id} updated successfully")
|
|
else:
|
|
logger.warning(f"Failed to update message {message_id}")
|
|
|
|
return updated_message
|
|
except Exception as e:
|
|
logger.error(f"Error updating message {message_id}: {str(e)}", exc_info=True)
|
|
# Re-raise with full information
|
|
raise ValueError(f"Error updating message {message_id}: {str(e)}")
|
|
|
|
def delete_workflow_message(self, workflow_id: str, message_id: str) -> bool:
|
|
"""
|
|
Deletes a message from a workflow in the database.
|
|
|
|
Args:
|
|
workflow_id: ID of the associated workflow
|
|
message_id: ID of the message to delete
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Check if the message exists
|
|
messages = self.get_workflow_messages(workflow_id)
|
|
message = next((m for m in messages if m.get("id") == message_id), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Message {message_id} for workflow {workflow_id} not found")
|
|
return False
|
|
|
|
# Delete the message from the database
|
|
return self.db.record_delete("workflow_messages", message_id)
|
|
except Exception as e:
|
|
logger.error(f"Error deleting message {message_id}: {str(e)}")
|
|
return False
|
|
|
|
def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: int) -> bool:
|
|
"""
|
|
Removes a file reference from a message.
|
|
The file itself is not deleted, only the reference in the message.
|
|
Enhanced version with improved file matching.
|
|
|
|
Args:
|
|
workflow_id: ID of the associated workflow
|
|
message_id: ID of the message
|
|
file_id: ID of the file to remove
|
|
|
|
Returns:
|
|
True on success, False on error
|
|
"""
|
|
try:
|
|
# Log operation
|
|
logger.info(f"Removing file {file_id} from message {message_id} in workflow {workflow_id}")
|
|
|
|
# Get all workflow messages
|
|
all_messages = self.get_workflow_messages(workflow_id)
|
|
logger.debug(f"Workflow {workflow_id} has {len(all_messages)} messages")
|
|
|
|
# Try different approaches to find the message
|
|
message = None
|
|
|
|
# Exact match
|
|
message = next((m for m in all_messages if m.get("id") == message_id), None)
|
|
|
|
# Case-insensitive match
|
|
if not message and isinstance(message_id, str):
|
|
message = next((m for m in all_messages
|
|
if isinstance(m.get("id"), str) and m.get("id").lower() == message_id.lower()), None)
|
|
|
|
# Partial match (starts with)
|
|
if not message and isinstance(message_id, str):
|
|
message = next((m for m in all_messages
|
|
if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Message {message_id} not found in workflow {workflow_id}")
|
|
return False
|
|
|
|
# Log the found message
|
|
logger.info(f"Found message: {message.get('id')}")
|
|
|
|
# Check if message has documents
|
|
if "documents" not in message or not message["documents"]:
|
|
logger.warning(f"No documents in message {message_id}")
|
|
return False
|
|
|
|
# Log existing documents
|
|
documents = message.get("documents", [])
|
|
logger.debug(f"Message has {len(documents)} documents")
|
|
for i, doc in enumerate(documents):
|
|
doc_id = doc.get("id", "unknown")
|
|
file_id_value = doc.get("file_id", "unknown")
|
|
logger.debug(f"Document {i}: doc_id={doc_id}, file_id={file_id_value}")
|
|
|
|
# Create a new list of documents without the one to delete
|
|
updated_documents = []
|
|
removed = False
|
|
|
|
for doc in documents:
|
|
doc_id = doc.get("id")
|
|
file_id_value = doc.get("file_id")
|
|
|
|
# Flexible matching approach
|
|
should_remove = (
|
|
(doc_id == file_id) or
|
|
(file_id_value == file_id) or
|
|
(isinstance(doc_id, str) and str(file_id) in doc_id) or
|
|
(isinstance(file_id_value, str) and str(file_id) in file_id_value)
|
|
)
|
|
|
|
if should_remove:
|
|
removed = True
|
|
logger.info(f"Found file to remove: doc_id={doc_id}, file_id={file_id_value}")
|
|
else:
|
|
updated_documents.append(doc)
|
|
|
|
if not removed:
|
|
logger.warning(f"No matching file {file_id} found in message {message_id}")
|
|
return False
|
|
|
|
# Update message with modified documents array
|
|
message_update = {
|
|
"documents": updated_documents
|
|
}
|
|
|
|
# Apply the update directly to the database
|
|
updated = self.db.record_modify("workflow_messages", message["id"], message_update)
|
|
|
|
if updated:
|
|
logger.info(f"Successfully removed file {file_id} from message {message_id}")
|
|
return True
|
|
else:
|
|
logger.warning(f"Failed to update message {message_id} in database")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error removing file {file_id} from message {message_id}: {str(e)}")
|
|
return False
|
|
|
|
|
|
# Workflow Logs
|
|
|
|
def get_workflow_logs(self, workflow_id: str) -> List[Dict[str, Any]]:
|
|
"""Returns all log entries for a workflow"""
|
|
return self.db.get_recordset("workflow_logs", record_filter={"workflow_id": workflow_id})
|
|
|
|
def create_workflow_log(self, log_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Creates a new log entry for a workflow"""
|
|
# Make sure required fields are present
|
|
if "timestamp" not in log_data:
|
|
log_data["timestamp"] = self._get_current_timestamp()
|
|
|
|
# Add status information if not present
|
|
if "status" not in log_data and "type" in log_data:
|
|
if log_data["type"] == "error":
|
|
log_data["status"] = "error"
|
|
else:
|
|
log_data["status"] = "running"
|
|
|
|
# Add progress information if not present
|
|
if "progress" not in log_data:
|
|
# Default progress values based on log type
|
|
if log_data.get("type") == "info":
|
|
log_data["progress"] = 50 # Default middle progress
|
|
elif log_data.get("type") == "error":
|
|
log_data["progress"] = -1 # Error state
|
|
elif log_data.get("type") == "warning":
|
|
log_data["progress"] = 50 # Default middle progress
|
|
|
|
return self.db.record_create("workflow_logs", log_data)
|
|
|
|
|
|
# Workflow Management
|
|
|
|
def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool:
|
|
"""
|
|
Saves the state of a workflow to the database.
|
|
Workflow data is updated, but messages are stored separately.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
save_messages: Flag to determine if messages should be saved
|
|
save_logs: Flag to determine if logs should be saved
|
|
|
|
Returns:
|
|
True on success, False on failure
|
|
"""
|
|
try:
|
|
workflow_id = workflow.get("id")
|
|
if not workflow_id:
|
|
return False
|
|
|
|
# Extract only the database-relevant workflow fields
|
|
# IMPORTANT: Don't store messages in the workflow table!
|
|
workflow_db_data = {
|
|
"id": workflow_id,
|
|
"mandate_id": workflow.get("mandate_id", self.mandate_id),
|
|
"user_id": workflow.get("user_id", self.user_id),
|
|
"name": workflow.get("name", f"Workflow {workflow_id}"),
|
|
"status": workflow.get("status", "completed"),
|
|
"started_at": workflow.get("started_at", self._get_current_timestamp()),
|
|
"last_activity": workflow.get("last_activity", self._get_current_timestamp()),
|
|
"data_stats": workflow.get("data_stats", {})
|
|
}
|
|
|
|
# Check if workflow already exists
|
|
existing_workflow = self.get_workflow(workflow_id)
|
|
if existing_workflow:
|
|
self.update_workflow(workflow_id, workflow_db_data)
|
|
else:
|
|
self.create_workflow(workflow_db_data)
|
|
|
|
# Save messages
|
|
if save_messages and "messages" in workflow:
|
|
for message in workflow["messages"]:
|
|
message_id = message.get("id")
|
|
if not message_id:
|
|
continue
|
|
|
|
# Since each message is already saved with create_workflow_message,
|
|
# we only need to check if updates are necessary
|
|
# First, get existing message from database
|
|
existing_messages = self.get_workflow_messages(workflow_id)
|
|
existing_message = next((m for m in existing_messages if m.get("id") == message_id), None)
|
|
|
|
if existing_message:
|
|
# Check if updates are needed
|
|
has_changes = False
|
|
for key in ["role", "agent_name", "content", "status", "documents"]:
|
|
if key in message and message.get(key) != existing_message.get(key):
|
|
has_changes = True
|
|
break
|
|
|
|
if has_changes:
|
|
# Extract only relevant data for the database
|
|
message_data = {
|
|
"role": message.get("role", existing_message.get("role", "unknown")),
|
|
"content": message.get("content", existing_message.get("content", "")),
|
|
"agent_name": message.get("agent_name", existing_message.get("agent_name", "")),
|
|
"status": message.get("status", existing_message.get("status", "completed")),
|
|
"documents": message.get("documents", existing_message.get("documents", []))
|
|
}
|
|
self.update_workflow_message(message_id, message_data)
|
|
else:
|
|
# Message doesn't exist in database yet
|
|
# It should have been saved via create_workflow_message
|
|
# If not, log a warning
|
|
logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database")
|
|
|
|
# Save logs
|
|
if save_logs and "logs" in workflow:
|
|
# Get existing logs
|
|
existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)}
|
|
|
|
for log in workflow["logs"]:
|
|
log_id = log.get("id")
|
|
if not log_id:
|
|
continue
|
|
|
|
# Extract only relevant data for the database
|
|
log_data = {
|
|
"id": log_id,
|
|
"workflow_id": workflow_id,
|
|
"message": log.get("message", ""),
|
|
"type": log.get("type", "info"),
|
|
"timestamp": log.get("timestamp", self._get_current_timestamp()),
|
|
"agent_name": log.get("agent_name", "(undefined)"),
|
|
"status": log.get("status", "running"),
|
|
"progress": log.get("progress", 50)
|
|
}
|
|
|
|
# Create or update log
|
|
if log_id in existing_logs:
|
|
self.db.record_modify("workflow_logs", log_id, log_data)
|
|
else:
|
|
self.db.record_create("workflow_logs", log_id, log_data)
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error saving workflow state: {str(e)}")
|
|
return False
|
|
|
|
def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Loads the complete state of a workflow from the database.
|
|
This includes the workflow itself, messages, and logs.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow to load
|
|
|
|
Returns:
|
|
The complete workflow object or None on error
|
|
"""
|
|
try:
|
|
# Load base workflow
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Log the workflow base retrieval
|
|
logger.debug(f"Loaded base workflow {workflow_id} from database")
|
|
|
|
# Load messages
|
|
messages = self.get_workflow_messages(workflow_id)
|
|
# Sort by sequence number
|
|
messages.sort(key=lambda x: x.get("sequence_no", 0))
|
|
|
|
# Debug log for messages and document counts
|
|
message_count = len(messages)
|
|
logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}")
|
|
|
|
# Check if message_ids exists and is valid
|
|
message_ids = workflow.get("message_ids", [])
|
|
if not message_ids or len(message_ids) != len(messages):
|
|
# Rebuild message_ids from messages
|
|
message_ids = [msg.get("id") for msg in messages]
|
|
# Update in database
|
|
self.update_workflow(workflow_id, {"message_ids": message_ids})
|
|
logger.info(f"Rebuilt message_ids for workflow {workflow_id}")
|
|
|
|
# Log document counts for each message
|
|
for msg in messages:
|
|
doc_count = len(msg.get("documents", []))
|
|
if doc_count > 0:
|
|
logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database")
|
|
|
|
# Load logs
|
|
logs = self.get_workflow_logs(workflow_id)
|
|
# Sort by timestamp
|
|
logs.sort(key=lambda x: x.get("timestamp", ""))
|
|
|
|
# Assemble complete workflow object
|
|
complete_workflow = workflow.copy()
|
|
complete_workflow["messages"] = messages
|
|
complete_workflow["message_ids"] = message_ids # Ensure message_ids is included
|
|
complete_workflow["logs"] = logs
|
|
|
|
return complete_workflow
|
|
except Exception as e:
|
|
logger.error(f"Error loading workflow state: {str(e)}")
|
|
return None
|
|
|
|
|
|
# Singleton factory for LucyDOMInterface instances per context
|
|
_lucydom_interfaces = {}
|
|
|
|
def get_lucydom_interface(mandate_id: int = 0, user_id: int = 0) -> LucyDOMInterface:
|
|
"""
|
|
Returns a LucyDOMInterface instance for the specified context.
|
|
Reuses existing instances.
|
|
"""
|
|
context_key = f"{mandate_id}_{user_id}"
|
|
if context_key not in _lucydom_interfaces:
|
|
# Create new interface instance
|
|
interface = LucyDOMInterface(mandate_id, user_id)
|
|
# Initialize AI service
|
|
ai_service = ChatService()
|
|
interface.ai_service = ai_service # Directly set the attribute
|
|
_lucydom_interfaces[context_key] = interface
|
|
return _lucydom_interfaces[context_key]
|
|
|
|
# Init
|
|
get_lucydom_interface() |