1194 lines
No EOL
50 KiB
Python
1194 lines
No EOL
50 KiB
Python
import os
|
|
import logging
|
|
import uuid
|
|
from datetime import datetime
|
|
from typing import Dict, Any, List, Optional, Union
|
|
|
|
import importlib
|
|
import hashlib
|
|
|
|
from connectors.connector_db_json import DatabaseConnector
|
|
from modules.configuration import APP_CONFIG
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
# Custom exceptions for file handling
|
|
class FileError(Exception):
|
|
"""Base class for file handling exceptions."""
|
|
pass
|
|
|
|
class FileNotFoundError(FileError):
|
|
"""Exception raised when a file is not found."""
|
|
pass
|
|
|
|
class FileStorageError(FileError):
|
|
"""Exception raised when there's an error storing a file."""
|
|
pass
|
|
|
|
class FilePermissionError(FileError):
|
|
"""Exception raised when there's a permission issue with a file."""
|
|
pass
|
|
|
|
class FileDeletionError(FileError):
|
|
"""Exception raised when there's an error deleting a file."""
|
|
pass
|
|
|
|
|
|
class LucyDOMInterface:
|
|
"""
|
|
Interface zur LucyDOM-Datenbank.
|
|
Verwendet den JSON-Konnektor für den Datenzugriff.
|
|
"""
|
|
|
|
def __init__(self, mandate_id: int, user_id: int):
|
|
"""
|
|
Initialisiert das LucyDOM-Interface mit Mandanten- und Benutzerkontext.
|
|
|
|
Args:
|
|
mandate_id: ID des aktuellen Mandanten
|
|
user_id: ID des aktuellen Benutzers
|
|
"""
|
|
self.mandate_id = mandate_id
|
|
self.user_id = user_id
|
|
|
|
# Datenmodell-Modul importieren
|
|
try:
|
|
self.model_module = importlib.import_module("modules.lucydom_model")
|
|
logger.info("lucydom_model erfolgreich importiert")
|
|
except ImportError as e:
|
|
logger.error(f"Fehler beim Importieren von lucydom_model: {e}")
|
|
raise
|
|
|
|
# Datenbank initialisieren, falls nötig
|
|
self._initialize_database()
|
|
|
|
def _initialize_database(self):
|
|
"""
|
|
Initialisiert die Datenbank mit minimalen Objekten für den angemeldeten Benutzer im Mandanten, falls sie noch nicht existiert.
|
|
Ohne gültigen Benutzer keine Initialisierung.
|
|
Erstellt für jede im Datenmodell definierte Tabelle einen initialen Datensatz.
|
|
"""
|
|
effective_mandate_id = self.mandate_id
|
|
effective_user_id = self.user_id
|
|
if effective_mandate_id is None or effective_user_id is None:
|
|
#data available
|
|
return
|
|
|
|
self.db = DatabaseConnector(
|
|
db_host=APP_CONFIG.get("DB_LUCYDOM_HOST"),
|
|
db_database=APP_CONFIG.get("DB_LUCYDOM_DATABASE"),
|
|
db_user=APP_CONFIG.get("DB_LUCYDOM_USER"),
|
|
db_password=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"),
|
|
mandate_id=self.mandate_id,
|
|
user_id=self.user_id
|
|
)
|
|
|
|
# Initialisierung von Standard-Prompts für verschiedene Bereiche
|
|
prompts = self.db.get_recordset("prompts")
|
|
if not prompts:
|
|
logger.info("Erstelle Standard-Prompts")
|
|
|
|
# Standard-Prompts definieren
|
|
standard_prompts = [
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Recherchiere die aktuellen Markttrends und Entwicklungen im Bereich [THEMA]. Sammle Informationen zu führenden Unternehmen, innovativen Produkten oder Dienstleistungen und aktuellen Herausforderungen. Präsentiere die Ergebnisse in einer strukturierten Übersicht mit relevanten Daten und Quellen.",
|
|
"name": "Web Research: Marktforschung"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Analysiere den beigefügten Datensatz zu [THEMA] und identifiziere die wichtigsten Trends, Muster und Auffälligkeiten. Führe statistische Berechnungen durch, um deine Erkenntnisse zu untermauern. Stelle die Ergebnisse in einer klar strukturierten Analyse dar und ziehe relevante Schlussfolgerungen.",
|
|
"name": "Analyse: Datenanalyse"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Erstelle ein detailliertes Protokoll unserer Besprechung zum Thema [THEMA]. Erfasse alle besprochenen Punkte, getroffenen Entscheidungen und vereinbarten Maßnahmen. Strukturiere das Protokoll übersichtlich mit Tagesordnungspunkten, Teilnehmerliste und klaren Verantwortlichkeiten für die Follow-up-Aktionen.",
|
|
"name": "Protokoll: Besprechungsprotokoll"
|
|
},
|
|
{
|
|
"mandate_id": effective_mandate_id,
|
|
"user_id": effective_user_id,
|
|
"content": "Entwickle ein UI/UX-Designkonzept für [ANWENDUNG/WEBSITE]. Berücksichtige die Zielgruppe, Hauptfunktionen und die Markenidentität. Beschreibe die visuelle Gestaltung, Navigation, Interaktionsmuster und Informationsarchitektur. Erläutere, wie das Design die Benutzerfreundlichkeit und das Nutzererlebnis optimiert.",
|
|
"name": "Design: UI/UX Design"
|
|
}
|
|
]
|
|
|
|
# Prompts erstellen
|
|
for prompt_data in standard_prompts:
|
|
created_prompt = self.db.record_create("prompts", prompt_data)
|
|
logger.info(f"Prompt '{prompt_data.get('name', 'Standard')}' wurde erstellt mit ID {created_prompt['id']}")
|
|
|
|
|
|
# Utilities
|
|
|
|
def get_initial_id(self, table: str) -> Optional[int]:
|
|
"""
|
|
Gibt die initiale ID für eine Tabelle zurück.
|
|
|
|
Args:
|
|
table: Name der Tabelle
|
|
|
|
Returns:
|
|
Die initiale ID oder None, wenn nicht vorhanden
|
|
"""
|
|
return self.db.get_initial_id(table)
|
|
|
|
def _get_current_timestamp(self) -> str:
|
|
"""Gibt den aktuellen Zeitstempel im ISO-Format zurück"""
|
|
return datetime.now().isoformat()
|
|
|
|
|
|
# Prompt-Methoden
|
|
|
|
def get_all_prompts(self) -> List[Dict[str, Any]]:
|
|
"""Gibt alle Prompts des aktuellen Mandanten zurück"""
|
|
return self.db.get_recordset("prompts")
|
|
|
|
def get_prompt(self, prompt_id: int) -> Optional[Dict[str, Any]]:
|
|
"""Gibt einen Prompt anhand seiner ID zurück"""
|
|
prompts = self.db.get_recordset("prompts", record_filter={"id": prompt_id})
|
|
if prompts:
|
|
return prompts[0]
|
|
return None
|
|
|
|
def create_prompt(self, content: str, name: str) -> Dict[str, Any]:
|
|
"""Erstellt einen neuen Prompt"""
|
|
prompt_data = {
|
|
"mandate_id": self.mandate_id,
|
|
"user_id": self.user_id,
|
|
"content": content,
|
|
"name": name,
|
|
"created_at": self._get_current_timestamp()
|
|
}
|
|
|
|
return self.db.record_create("prompts", prompt_data)
|
|
|
|
def update_prompt(self, prompt_id: int, content: str = None, name: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Aktualisiert einen vorhandenen Prompt
|
|
|
|
Args:
|
|
prompt_id: ID des zu aktualisierenden Prompts
|
|
content: Neuer Inhalt des Prompts
|
|
|
|
Returns:
|
|
Das aktualisierte Prompt-Objekt
|
|
"""
|
|
# Prüfen, ob der Prompt existiert
|
|
prompt = self.get_prompt(prompt_id)
|
|
if not prompt:
|
|
return None
|
|
|
|
# Daten für die Aktualisierung vorbereiten
|
|
prompt_data = {}
|
|
|
|
if content is not None:
|
|
prompt_data["content"] = content
|
|
if name is not None:
|
|
prompt_data["name"] = name
|
|
|
|
# Prompt aktualisieren
|
|
return self.db.record_modify("prompts", prompt_id, prompt_data)
|
|
|
|
def delete_prompt(self, prompt_id: int) -> bool:
|
|
"""
|
|
Löscht einen Prompt aus der Datenbank
|
|
|
|
Args:
|
|
prompt_id: ID des zu löschenden Prompts
|
|
|
|
Returns:
|
|
True, wenn der Prompt erfolgreich gelöscht wurde, sonst False
|
|
"""
|
|
return self.db.record_delete("prompts", prompt_id)
|
|
|
|
|
|
# File Utilities
|
|
|
|
def calculate_file_hash(self, file_content: bytes) -> str:
|
|
"""Berechnet einen SHA-256-Hash für den Dateiinhalt"""
|
|
return hashlib.sha256(file_content).hexdigest()
|
|
|
|
def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
|
"""Prüft, ob bereits eine Datei mit demselben Hash existiert"""
|
|
files = self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def get_mime_type(self, filename: str) -> str:
|
|
"""Ermittelt den MIME-Typ basierend auf der Dateiendung"""
|
|
import os
|
|
ext = os.path.splitext(filename)[1].lower()[1:]
|
|
extension_to_mime = {
|
|
"pdf": "application/pdf",
|
|
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
|
"doc": "application/msword",
|
|
"xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
|
"xls": "application/vnd.ms-excel",
|
|
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
|
"ppt": "application/vnd.ms-powerpoint",
|
|
"csv": "text/csv",
|
|
"txt": "text/plain",
|
|
"json": "application/json",
|
|
"xml": "application/xml",
|
|
"html": "text/html",
|
|
"htm": "text/html",
|
|
"jpg": "image/jpeg",
|
|
"jpeg": "image/jpeg",
|
|
"png": "image/png",
|
|
"gif": "image/gif",
|
|
"webp": "image/webp",
|
|
"svg": "image/svg+xml",
|
|
"py": "text/x-python",
|
|
"js": "application/javascript",
|
|
"css": "text/css"
|
|
}
|
|
return extension_to_mime.get(ext.lower(), "application/octet-stream")
|
|
|
|
|
|
# File Methoden - Metadaten-basierte Operationen
|
|
|
|
def get_all_files(self) -> List[Dict[str, Any]]:
|
|
"""
|
|
Gibt alle Dateien des aktuellen Mandanten zurück ohne Binärdaten.
|
|
|
|
Returns:
|
|
Liste von FileItem-Objekten ohne Binärdaten
|
|
"""
|
|
files = self.db.get_recordset("files")
|
|
return files
|
|
|
|
def get_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Gibt eine Datei anhand ihrer ID zurück, ohne Binärdaten.
|
|
|
|
Args:
|
|
file_id: ID der gesuchten Datei
|
|
|
|
Returns:
|
|
FileItem ohne Binärdaten oder None, wenn nicht gefunden
|
|
"""
|
|
files = self.db.get_recordset("files", record_filter={"id": file_id})
|
|
if files:
|
|
return files[0]
|
|
return None
|
|
|
|
def create_file(self, name: str, mime_type: str, size: int = None, file_hash: str = None) -> Dict[str, Any]:
|
|
"""
|
|
Erstellt einen neuen Dateieintrag in der Datenbank ohne Inhalt.
|
|
Der eigentliche Dateiinhalt wird separat in der FileData-Tabelle gespeichert.
|
|
|
|
Args:
|
|
name: Name der Datei
|
|
mime_type: MIME-Typ der Datei
|
|
size: Größe der Datei in Bytes
|
|
file_hash: Hash-Wert der Datei für Deduplizierung
|
|
|
|
Returns:
|
|
Das erstellte FileItem-Objekt
|
|
"""
|
|
file_data = {
|
|
"mandate_id": self.mandate_id,
|
|
"user_id": self.user_id,
|
|
"name": name,
|
|
"mime_type": mime_type,
|
|
"size": size,
|
|
"file_hash": file_hash,
|
|
"creation_date": self._get_current_timestamp()
|
|
}
|
|
return self.db.record_create("files", file_data)
|
|
|
|
def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Aktualisiert die Metadaten einer vorhandenen Datei ohne die Binärdaten zu beeinflussen.
|
|
|
|
Args:
|
|
file_id: ID der zu aktualisierenden Datei
|
|
update_data: Dictionary mit zu aktualisierenden Feldern
|
|
|
|
Returns:
|
|
Das aktualisierte FileItem-Objekt
|
|
"""
|
|
# Prüfen, ob die Datei existiert
|
|
file = self.get_file(file_id)
|
|
if not file:
|
|
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden")
|
|
|
|
# Datei aktualisieren
|
|
return self.db.record_modify("files", file_id, update_data)
|
|
|
|
def delete_file(self, file_id: int) -> bool:
|
|
"""
|
|
Löscht eine Datei aus der Datenbank (Metadaten und Inhalt).
|
|
|
|
Args:
|
|
file_id: ID der Datei
|
|
|
|
Returns:
|
|
True bei Erfolg, False bei Fehler
|
|
"""
|
|
try:
|
|
# Suche die Datei in der Datenbank
|
|
file = self.get_file(file_id)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden")
|
|
|
|
# Prüfe, ob die Datei zum aktuellen Mandanten gehört
|
|
if file.get("mandate_id") != self.mandate_id:
|
|
raise FilePermissionError(f"Keine Berechtigung zum Löschen der Datei {file_id}")
|
|
|
|
# Check for other references to this file (by hash)
|
|
file_hash = file.get("file_hash")
|
|
if file_hash:
|
|
other_references = [f for f in self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
|
if f.get("id") != file_id]
|
|
|
|
# If other files reference this content, only delete the database entry for FileItem
|
|
if other_references:
|
|
logger.info(f"Andere Referenzen auf den Dateiinhalt gefunden, nur FileItem wird gelöscht: {file_id}")
|
|
else:
|
|
# Lösche auch den Dateiinhalt in der FileData-Tabelle
|
|
try:
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
if file_data_entries:
|
|
self.db.record_delete("file_data", file_id)
|
|
logger.info(f"FileData für Datei {file_id} gelöscht")
|
|
except Exception as e:
|
|
logger.warning(f"Fehler beim Löschen des FileData für Datei {file_id}: {str(e)}")
|
|
|
|
# Lösche den FileItem-Eintrag
|
|
return self.db.record_delete("files", file_id)
|
|
|
|
except FileNotFoundError as e:
|
|
# Pass through FileNotFoundError
|
|
raise
|
|
except FilePermissionError as e:
|
|
# Pass through FilePermissionError
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Löschen der Datei {file_id}: {str(e)}")
|
|
raise FileDeletionError(f"Fehler beim Löschen der Datei: {str(e)}")
|
|
|
|
|
|
# FileData Methoden - Binärdaten-basierte Operationen
|
|
|
|
def create_file_data(self, file_id: int, data: bytes) -> bool:
|
|
"""
|
|
Speichert die Binärdaten einer Datei in der Datenbank als Base64-String.
|
|
|
|
Args:
|
|
file_id: ID der zugehörigen Datei
|
|
data: Binärdaten
|
|
|
|
Returns:
|
|
True bei Erfolg, False bei Fehler
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert binary data to base64 string
|
|
if isinstance(data, bytes):
|
|
encoded_data = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encoded_data)}")
|
|
else:
|
|
logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...")
|
|
# Try to convert to bytes if it's not already
|
|
if isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encoded_data = data
|
|
logger.info(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.info(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Create the file_data record with encoded data
|
|
file_data = {
|
|
"id": file_id,
|
|
"data": encoded_data
|
|
}
|
|
|
|
self.db.record_create("file_data", file_data)
|
|
logger.info(f"Successfully stored encoded data for file {file_id}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Speichern der Binärdaten für Datei {file_id}: {str(e)}")
|
|
return False
|
|
|
|
def get_file_data(self, file_id: int) -> Optional[bytes]:
|
|
"""
|
|
Gibt die Binärdaten einer Datei zurück.
|
|
Konvertiert Base64-String aus der Datenbank zurück zu bytes.
|
|
|
|
Args:
|
|
file_id: ID der Datei
|
|
|
|
Returns:
|
|
Binärdaten oder None, wenn nicht gefunden
|
|
"""
|
|
import base64
|
|
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
if file_data_entries and "data" in file_data_entries[0]:
|
|
encoded_data = file_data_entries[0]["data"]
|
|
|
|
try:
|
|
# Check if it's a string (most likely base64)
|
|
if isinstance(encoded_data, str):
|
|
try:
|
|
# Try to decode base64
|
|
binary_data = base64.b64decode(encoded_data)
|
|
logger.debug(f"Successfully decoded base64 string to {len(binary_data)} bytes")
|
|
return binary_data
|
|
except Exception as e:
|
|
logger.error(f"Failed to decode base64 data: {str(e)}")
|
|
# If it's not valid base64, return as bytes
|
|
return encoded_data.encode('utf-8')
|
|
# If it's already bytes (shouldn't happen with model change)
|
|
elif isinstance(encoded_data, bytes):
|
|
logger.warning(f"Data was already bytes, no conversion needed")
|
|
return encoded_data
|
|
else:
|
|
logger.error(f"Unexpected data type in database: {type(encoded_data)}")
|
|
return None
|
|
except Exception as e:
|
|
logger.error(f"Error processing file data: {str(e)}")
|
|
return None
|
|
else:
|
|
logger.warning(f"No data found for file ID {file_id}")
|
|
return None
|
|
|
|
def update_file_data(self, file_id: int, data: Union[bytes, str]) -> bool:
|
|
"""
|
|
Aktualisiert die Binärdaten einer Datei in der Datenbank.
|
|
Konvertiert bytes zu Base64-String für die Speicherung.
|
|
|
|
Args:
|
|
file_id: ID der Datei
|
|
data: Neue Binärdaten oder kodierte Daten
|
|
|
|
Returns:
|
|
True bei Erfolg, False bei Fehler
|
|
"""
|
|
try:
|
|
import base64
|
|
|
|
# Convert data to base64 string if it's bytes
|
|
if isinstance(data, bytes):
|
|
encoded_data = base64.b64encode(data).decode('utf-8')
|
|
logger.debug(f"Converted {len(data)} bytes to base64 string")
|
|
elif isinstance(data, str):
|
|
# Check if it might already be base64 encoded
|
|
try:
|
|
# See if it's valid base64
|
|
base64.b64decode(data)
|
|
# If no error, assume it's already encoded
|
|
encoded_data = data
|
|
logger.debug(f"Data appears to be already base64 encoded, using as is")
|
|
except:
|
|
# Not base64, so encode the string as bytes then to base64
|
|
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
|
logger.debug(f"Converted string to base64")
|
|
else:
|
|
# For other types, convert to string first
|
|
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
|
logger.warning(f"Converted non-standard type to base64")
|
|
|
|
# Check if a record already exists
|
|
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
|
|
|
if file_data_entries:
|
|
# Update the existing record
|
|
self.db.record_modify("file_data", file_id, {"data": encoded_data})
|
|
logger.info(f"Updated existing file data for file ID {file_id}")
|
|
else:
|
|
# Create a new record
|
|
file_data = {
|
|
"id": file_id,
|
|
"data": encoded_data
|
|
}
|
|
self.db.record_create("file_data", file_data)
|
|
logger.info(f"Created new file data for file ID {file_id}")
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Aktualisieren der Binärdaten für Datei {file_id}: {str(e)}")
|
|
return False
|
|
|
|
def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]:
|
|
"""
|
|
Speichert eine hochgeladene Datei in der Datenbank.
|
|
Metadaten werden in der 'files'-Tabelle gespeichert,
|
|
Binärdaten in der 'file_data'-Tabelle als Base64-String.
|
|
|
|
Args:
|
|
file_content: Binärdaten der Datei
|
|
file_name: Name der Datei
|
|
|
|
Returns:
|
|
Dictionary mit Metadaten der gespeicherten Datei
|
|
"""
|
|
try:
|
|
# Debug: Log the start of the file upload process
|
|
logger.info(f"Starting upload process for file: {file_name}")
|
|
|
|
# Debug: Check if file_content is valid bytes
|
|
if not isinstance(file_content, bytes):
|
|
logger.error(f"Invalid file_content type: {type(file_content)}")
|
|
raise ValueError(f"file_content must be bytes, got {type(file_content)}")
|
|
|
|
# Calculate file hash for deduplication
|
|
file_hash = self.calculate_file_hash(file_content)
|
|
logger.debug(f"Calculated file hash: {file_hash}")
|
|
|
|
# Check for duplicate
|
|
existing_file = self.check_for_duplicate_file(file_hash)
|
|
if existing_file:
|
|
# Simply return the existing file metadata
|
|
logger.info(f"Duplikat gefunden für {file_name}: {existing_file['id']}")
|
|
return existing_file
|
|
|
|
# MIME-Typ bestimmen
|
|
mime_type = self.get_mime_type(file_name)
|
|
|
|
# Dateigröße bestimmen
|
|
file_size = len(file_content)
|
|
|
|
# 1. Speichere Metadaten in der 'files'-Tabelle
|
|
logger.info(f"Saving file metadata to database for file: {file_name}")
|
|
db_file = self.create_file(
|
|
name=file_name,
|
|
mime_type=mime_type,
|
|
size=file_size,
|
|
file_hash=file_hash
|
|
)
|
|
|
|
# 2. Speichere Binärdaten als Base64-String in der 'file_data'-Tabelle
|
|
logger.info(f"Saving file content to database for file: {file_name}")
|
|
self.create_file_data(db_file["id"], file_content)
|
|
|
|
# Debug: Export file to static folder
|
|
if logger.isEnabledFor(logging.DEBUG): self._export_file_to_static(file_content, db_file["id"], file_name)
|
|
|
|
# Debug: Verify database record was created
|
|
if not db_file:
|
|
logger.warning(f"Database record for file {file_name} was not created properly")
|
|
else:
|
|
logger.debug(f"Database record created for file {file_name}")
|
|
|
|
logger.info(f"File upload process completed for: {file_name}")
|
|
return db_file
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error in save_uploaded_file for {file_name}: {str(e)}", exc_info=True)
|
|
raise FileStorageError(f"Fehler beim Speichern der Datei: {str(e)}")
|
|
|
|
def download_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Gibt eine Datei zum Download zurück, einschließlich Binärdaten.
|
|
|
|
Args:
|
|
file_id: ID der Datei
|
|
|
|
Returns:
|
|
Dictionary mit Dateidaten und -metadaten oder None, wenn nicht gefunden
|
|
"""
|
|
try:
|
|
# 1. Metadaten aus der 'files'-Tabelle holen
|
|
file = self.get_file(file_id)
|
|
|
|
if not file:
|
|
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden")
|
|
|
|
# 2. Binärdaten aus der 'file_data'-Tabelle holen
|
|
file_content = self.get_file_data(file_id)
|
|
|
|
if file_content is None:
|
|
raise FileNotFoundError(f"Binärdaten für Datei mit ID {file_id} nicht gefunden")
|
|
|
|
return {
|
|
"id": file_id,
|
|
"name": file.get("name", f"file_{file_id}"),
|
|
"content_type": file.get("mime_type", "application/octet-stream"),
|
|
"size": file.get("size", len(file_content)),
|
|
"content": file_content
|
|
}
|
|
except FileNotFoundError as e:
|
|
# Re-raise FileNotFoundError as is
|
|
raise
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}")
|
|
raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}")
|
|
|
|
def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str):
|
|
debug_filename = f"{file_id}_{file_name}"
|
|
with open(f"./static/{debug_filename}", 'wb') as f:
|
|
f.write(file_content)
|
|
|
|
# Workflow Methoden
|
|
|
|
def get_all_workflows(self) -> List[Dict[str, Any]]:
|
|
"""Gibt alle Workflows des aktuellen Mandanten zurück"""
|
|
return self.db.get_recordset("workflows")
|
|
|
|
def get_workflows_by_user(self, user_id: int) -> List[Dict[str, Any]]:
|
|
"""Gibt alle Workflows eines Benutzers zurück"""
|
|
return self.db.get_recordset("workflows", record_filter={"user_id": user_id})
|
|
|
|
def get_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
|
"""Gibt einen Workflow anhand seiner ID zurück"""
|
|
workflows = self.db.get_recordset("workflows", record_filter={"id": workflow_id})
|
|
if workflows:
|
|
return workflows[0]
|
|
return None
|
|
|
|
def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Erstellt einen neuen Workflow in der Datenbank"""
|
|
# Stellen Sie sicher, dass mandate_id und user_id gesetzt sind
|
|
if "mandate_id" not in workflow_data:
|
|
workflow_data["mandate_id"] = self.mandate_id
|
|
|
|
if "user_id" not in workflow_data:
|
|
workflow_data["user_id"] = self.user_id
|
|
|
|
# Zeitstempel setzen, falls nicht vorhanden
|
|
current_time = self._get_current_timestamp()
|
|
if "started_at" not in workflow_data:
|
|
workflow_data["started_at"] = current_time
|
|
|
|
if "last_activity" not in workflow_data:
|
|
workflow_data["last_activity"] = current_time
|
|
|
|
return self.db.record_create("workflows", workflow_data)
|
|
|
|
def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Aktualisiert einen vorhandenen Workflow.
|
|
|
|
Args:
|
|
workflow_id: ID des zu aktualisierenden Workflows
|
|
workflow_data: Neue Daten für den Workflow
|
|
|
|
Returns:
|
|
Das aktualisierte Workflow-Objekt
|
|
"""
|
|
# Prüfen, ob der Workflow existiert
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Aktualisierungszeit setzen
|
|
workflow_data["last_activity"] = self._get_current_timestamp()
|
|
|
|
# Workflow aktualisieren
|
|
return self.db.record_modify("workflows", workflow_id, workflow_data)
|
|
|
|
def delete_workflow(self, workflow_id: str) -> bool:
|
|
"""
|
|
Löscht einen Workflow aus der Datenbank.
|
|
|
|
Args:
|
|
workflow_id: ID des zu löschenden Workflows
|
|
|
|
Returns:
|
|
True bei Erfolg, False wenn der Workflow nicht existiert
|
|
"""
|
|
# Prüfen, ob der Workflow existiert
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return False
|
|
|
|
# Prüfen, ob der Benutzer der Eigentümer ist oder Admin-Rechte hat
|
|
if workflow.get("user_id") != self.user_id:
|
|
# Hier könnte eine Prüfung auf Admin-Rechte erfolgen
|
|
return False
|
|
|
|
# Workflow löschen
|
|
return self.db.record_delete("workflows", workflow_id)
|
|
|
|
|
|
# Workflow Messages
|
|
|
|
def get_workflow_messages(self, workflow_id: str) -> List[Dict[str, Any]]:
|
|
"""Gibt alle Nachrichten eines Workflows zurück"""
|
|
return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id})
|
|
|
|
def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Creates a new message for a workflow.
|
|
|
|
Args:
|
|
message_data: The message data
|
|
|
|
Returns:
|
|
The created message or None on error
|
|
"""
|
|
try:
|
|
# Check if required fields are present
|
|
required_fields = ["id", "workflow_id"]
|
|
for field in required_fields:
|
|
if field not in message_data:
|
|
logger.error(f"Required field '{field}' missing in message_data")
|
|
raise ValueError(f"Required field '{field}' missing in message data")
|
|
|
|
# Validate that ID is not None
|
|
if message_data["id"] is None:
|
|
message_data["id"] = f"msg_{uuid.uuid4()}"
|
|
logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}")
|
|
|
|
# Ensure required fields are present
|
|
if "started_at" not in message_data and "created_at" not in message_data:
|
|
message_data["started_at"] = self._get_current_timestamp()
|
|
|
|
if "created_at" in message_data and "started_at" not in message_data:
|
|
message_data["started_at"] = message_data["created_at"]
|
|
del message_data["created_at"]
|
|
|
|
# Set status if not present
|
|
if "status" not in message_data:
|
|
message_data["status"] = "completed"
|
|
|
|
# Set sequence number if not present
|
|
if "sequence_no" not in message_data:
|
|
# Get current messages to determine next sequence number
|
|
existing_messages = self.get_workflow_messages(message_data["workflow_id"])
|
|
message_data["sequence_no"] = len(existing_messages) + 1
|
|
|
|
# Ensure role and agent_name are present
|
|
if "role" not in message_data:
|
|
message_data["role"] = "assistant" if message_data.get("agent_name") else "user"
|
|
|
|
if "agent_name" not in message_data:
|
|
message_data["agent_name"] = ""
|
|
|
|
# Debug log for data to create
|
|
logger.debug(f"Creating workflow message with data: {message_data}")
|
|
|
|
# Create message in database
|
|
created_message = self.db.record_create("workflow_messages", message_data)
|
|
|
|
# Update workflow's message_ids if this is a new message
|
|
if created_message:
|
|
workflow_id = message_data["workflow_id"]
|
|
workflow = self.get_workflow(workflow_id)
|
|
|
|
if workflow:
|
|
# Get current message_ids or initialize empty list
|
|
message_ids = workflow.get("message_ids", [])
|
|
|
|
# Add the new message ID if not already in the list
|
|
if created_message["id"] not in message_ids:
|
|
message_ids.append(created_message["id"])
|
|
self.update_workflow(workflow_id, {"message_ids": message_ids})
|
|
|
|
return created_message
|
|
except Exception as e:
|
|
logger.error(f"Error creating workflow message: {str(e)}")
|
|
# Return None instead of raising to avoid cascading failures
|
|
return None
|
|
|
|
def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""
|
|
Updates an existing workflow message in the database.
|
|
|
|
Args:
|
|
message_id: ID of the message
|
|
message_data: Data to update
|
|
|
|
Returns:
|
|
The updated message object or None on error
|
|
"""
|
|
try:
|
|
# Debug info
|
|
logger.debug(f"Updating message {message_id} in database")
|
|
|
|
# Ensure message_id is provided
|
|
if not message_id:
|
|
logger.error("No message_id provided for update_workflow_message")
|
|
raise ValueError("message_id cannot be empty")
|
|
|
|
# Check if message exists in database
|
|
messages = self.db.get_recordset("workflow_messages", record_filter={"id": message_id})
|
|
if not messages:
|
|
logger.warning(f"Message with ID {message_id} does not exist in database")
|
|
|
|
# If message doesn't exist but we have workflow_id, create it
|
|
if "workflow_id" in message_data:
|
|
logger.info(f"Creating new message with ID {message_id} for workflow {message_data.get('workflow_id')}")
|
|
return self.db.record_create("workflow_messages", message_data)
|
|
else:
|
|
logger.error(f"Workflow ID missing for new message {message_id}")
|
|
return None
|
|
|
|
# Update existing message
|
|
existing_message = messages[0]
|
|
|
|
# Ensure required fields present
|
|
for key in ["role", "agent_name"]:
|
|
if key not in message_data and key not in existing_message:
|
|
message_data[key] = "assistant" if key == "role" else ""
|
|
|
|
# Ensure ID is in the dataset
|
|
if 'id' not in message_data:
|
|
message_data['id'] = message_id
|
|
|
|
# Convert created_at to started_at if needed
|
|
if "created_at" in message_data and "started_at" not in message_data:
|
|
message_data["started_at"] = message_data["created_at"]
|
|
del message_data["created_at"]
|
|
|
|
# Update the message
|
|
updated_message = self.db.record_modify("workflow_messages", message_id, message_data)
|
|
if updated_message:
|
|
logger.info(f"Message {message_id} updated successfully")
|
|
else:
|
|
logger.warning(f"Failed to update message {message_id}")
|
|
|
|
return updated_message
|
|
except Exception as e:
|
|
logger.error(f"Error updating message {message_id}: {str(e)}", exc_info=True)
|
|
# Re-raise with full information
|
|
raise ValueError(f"Error updating message {message_id}: {str(e)}")
|
|
|
|
def delete_workflow_message(self, workflow_id: str, message_id: str) -> bool:
|
|
"""
|
|
Löscht eine Nachricht aus einem Workflow in der Datenbank.
|
|
|
|
Args:
|
|
workflow_id: ID des zugehörigen Workflows
|
|
message_id: ID der zu löschenden Nachricht
|
|
|
|
Returns:
|
|
True bei Erfolg, False bei Fehler
|
|
"""
|
|
try:
|
|
# Prüfen, ob die Nachricht existiert
|
|
messages = self.get_workflow_messages(workflow_id)
|
|
message = next((m for m in messages if m.get("id") == message_id), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Nachricht {message_id} für Workflow {workflow_id} nicht gefunden")
|
|
return False
|
|
|
|
# Nachricht aus der Datenbank löschen
|
|
return self.db.record_delete("workflow_messages", message_id)
|
|
except Exception as e:
|
|
logger.error(f"Fehler beim Löschen der Nachricht {message_id}: {str(e)}")
|
|
return False
|
|
|
|
def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: int) -> bool:
|
|
"""
|
|
Entfernt eine Dateireferenz aus einer Nachricht.
|
|
Die Datei selbst wird nicht gelöscht, nur die Referenz in der Nachricht.
|
|
Enhanced version with improved file matching.
|
|
|
|
Args:
|
|
workflow_id: ID des zugehörigen Workflows
|
|
message_id: ID der Nachricht
|
|
file_id: ID der zu entfernenden Datei
|
|
|
|
Returns:
|
|
True bei Erfolg, False bei Fehler
|
|
"""
|
|
try:
|
|
# Log operation
|
|
logger.info(f"Removing file {file_id} from message {message_id} in workflow {workflow_id}")
|
|
|
|
# Get all workflow messages
|
|
all_messages = self.get_workflow_messages(workflow_id)
|
|
logger.debug(f"Workflow {workflow_id} has {len(all_messages)} messages")
|
|
|
|
# Try different approaches to find the message
|
|
message = None
|
|
|
|
# Exact match
|
|
message = next((m for m in all_messages if m.get("id") == message_id), None)
|
|
|
|
# Case-insensitive match
|
|
if not message and isinstance(message_id, str):
|
|
message = next((m for m in all_messages
|
|
if isinstance(m.get("id"), str) and m.get("id").lower() == message_id.lower()), None)
|
|
|
|
# Partial match (starts with)
|
|
if not message and isinstance(message_id, str):
|
|
message = next((m for m in all_messages
|
|
if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None)
|
|
|
|
if not message:
|
|
logger.warning(f"Message {message_id} not found in workflow {workflow_id}")
|
|
return False
|
|
|
|
# Log the found message
|
|
logger.info(f"Found message: {message.get('id')}")
|
|
|
|
# Check if message has documents
|
|
if "documents" not in message or not message["documents"]:
|
|
logger.warning(f"No documents in message {message_id}")
|
|
return False
|
|
|
|
# Log existing documents
|
|
documents = message.get("documents", [])
|
|
logger.debug(f"Message has {len(documents)} documents")
|
|
for i, doc in enumerate(documents):
|
|
doc_id = doc.get("id", "unknown")
|
|
file_id_value = doc.get("file_id", "unknown")
|
|
logger.debug(f"Document {i}: doc_id={doc_id}, file_id={file_id_value}")
|
|
|
|
# Create a new list of documents without the one to delete
|
|
updated_documents = []
|
|
removed = False
|
|
|
|
for doc in documents:
|
|
doc_id = doc.get("id")
|
|
file_id_value = doc.get("file_id")
|
|
|
|
# Flexible matching approach
|
|
should_remove = (
|
|
(doc_id == file_id) or
|
|
(file_id_value == file_id) or
|
|
(isinstance(doc_id, str) and str(file_id) in doc_id) or
|
|
(isinstance(file_id_value, str) and str(file_id) in file_id_value)
|
|
)
|
|
|
|
if should_remove:
|
|
removed = True
|
|
logger.info(f"Found file to remove: doc_id={doc_id}, file_id={file_id_value}")
|
|
else:
|
|
updated_documents.append(doc)
|
|
|
|
if not removed:
|
|
logger.warning(f"No matching file {file_id} found in message {message_id}")
|
|
return False
|
|
|
|
# Update message with modified documents array
|
|
message_update = {
|
|
"documents": updated_documents
|
|
}
|
|
|
|
# Apply the update directly to the database
|
|
updated = self.db.record_modify("workflow_messages", message["id"], message_update)
|
|
|
|
if updated:
|
|
logger.info(f"Successfully removed file {file_id} from message {message_id}")
|
|
return True
|
|
else:
|
|
logger.warning(f"Failed to update message {message_id} in database")
|
|
return False
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error removing file {file_id} from message {message_id}: {str(e)}")
|
|
return False
|
|
|
|
|
|
# Workflow Logs
|
|
|
|
def get_workflow_logs(self, workflow_id: str) -> List[Dict[str, Any]]:
|
|
"""Gibt alle Log-Einträge eines Workflows zurück"""
|
|
return self.db.get_recordset("workflow_logs", record_filter={"workflow_id": workflow_id})
|
|
|
|
def create_workflow_log(self, log_data: Dict[str, Any]) -> Dict[str, Any]:
|
|
"""Erstellt einen neuen Log-Eintrag für einen Workflow"""
|
|
# Stellen Sie sicher, dass die benötigten Felder vorhanden sind
|
|
if "timestamp" not in log_data:
|
|
log_data["timestamp"] = self._get_current_timestamp()
|
|
|
|
return self.db.record_create("workflow_logs", log_data)
|
|
|
|
|
|
# Workflow Management
|
|
|
|
def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool:
|
|
"""
|
|
Saves the state of a workflow to the database.
|
|
Workflow data is updated, but messages are stored separately.
|
|
|
|
Args:
|
|
workflow: The workflow object
|
|
save_messages: Flag to determine if messages should be saved
|
|
save_logs: Flag to determine if logs should be saved
|
|
|
|
Returns:
|
|
True on success, False on failure
|
|
"""
|
|
try:
|
|
workflow_id = workflow.get("id")
|
|
if not workflow_id:
|
|
return False
|
|
|
|
# Extract only the database-relevant workflow fields
|
|
# IMPORTANT: Don't store messages in the workflow table!
|
|
workflow_db_data = {
|
|
"id": workflow_id,
|
|
"mandate_id": workflow.get("mandate_id", self.mandate_id),
|
|
"user_id": workflow.get("user_id", self.user_id),
|
|
"name": workflow.get("name", f"Workflow {workflow_id}"),
|
|
"status": workflow.get("status", "completed"),
|
|
"started_at": workflow.get("started_at", self._get_current_timestamp()),
|
|
"last_activity": workflow.get("last_activity", self._get_current_timestamp()),
|
|
"data_stats": workflow.get("data_stats", {})
|
|
}
|
|
|
|
# Check if workflow already exists
|
|
existing_workflow = self.get_workflow(workflow_id)
|
|
if existing_workflow:
|
|
self.update_workflow(workflow_id, workflow_db_data)
|
|
else:
|
|
self.create_workflow(workflow_db_data)
|
|
|
|
# Save messages
|
|
if save_messages and "messages" in workflow:
|
|
for message in workflow["messages"]:
|
|
message_id = message.get("id")
|
|
if not message_id:
|
|
continue
|
|
|
|
# Since each message is already saved with create_workflow_message,
|
|
# we only need to check if updates are necessary
|
|
# First, get existing message from database
|
|
existing_messages = self.get_workflow_messages(workflow_id)
|
|
existing_message = next((m for m in existing_messages if m.get("id") == message_id), None)
|
|
|
|
if existing_message:
|
|
# Check if updates are needed
|
|
has_changes = False
|
|
for key in ["role", "agent_name", "content", "status", "documents"]:
|
|
if key in message and message.get(key) != existing_message.get(key):
|
|
has_changes = True
|
|
break
|
|
|
|
if has_changes:
|
|
# Extract only relevant data for the database
|
|
message_data = {
|
|
"role": message.get("role", existing_message.get("role", "unknown")),
|
|
"content": message.get("content", existing_message.get("content", "")),
|
|
"agent_name": message.get("agent_name", existing_message.get("agent_name", "")),
|
|
"status": message.get("status", existing_message.get("status", "completed")),
|
|
"documents": message.get("documents", existing_message.get("documents", []))
|
|
}
|
|
self.update_workflow_message(message_id, message_data)
|
|
else:
|
|
# Message doesn't exist in database yet
|
|
# It should have been saved via create_workflow_message
|
|
# If not, log a warning
|
|
logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database")
|
|
|
|
# Save logs
|
|
if save_logs and "logs" in workflow:
|
|
# Get existing logs
|
|
existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)}
|
|
|
|
for log in workflow["logs"]:
|
|
log_id = log.get("id")
|
|
if not log_id:
|
|
continue
|
|
|
|
# Extract only relevant data for the database
|
|
log_data = {
|
|
"id": log_id,
|
|
"workflow_id": workflow_id,
|
|
"message": log.get("message", ""),
|
|
"type": log.get("type", "info"),
|
|
"timestamp": log.get("timestamp", self._get_current_timestamp()),
|
|
"agent_id": log.get("agent_id"),
|
|
"agent_name": log.get("agent_name")
|
|
}
|
|
|
|
# Create or update log
|
|
if log_id in existing_logs:
|
|
self.db.record_modify("workflow_logs", log_id, log_data)
|
|
else:
|
|
self.db.record_create("workflow_logs", log_id, log_data)
|
|
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Error saving workflow state: {str(e)}")
|
|
return False
|
|
|
|
def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Loads the complete state of a workflow from the database.
|
|
This includes the workflow itself, messages, and logs.
|
|
|
|
Args:
|
|
workflow_id: ID of the workflow to load
|
|
|
|
Returns:
|
|
The complete workflow object or None on error
|
|
"""
|
|
try:
|
|
# Load base workflow
|
|
workflow = self.get_workflow(workflow_id)
|
|
if not workflow:
|
|
return None
|
|
|
|
# Log the workflow base retrieval
|
|
logger.debug(f"Loaded base workflow {workflow_id} from database")
|
|
|
|
# Load messages
|
|
messages = self.get_workflow_messages(workflow_id)
|
|
# Sort by sequence number
|
|
messages.sort(key=lambda x: x.get("sequence_no", 0))
|
|
|
|
# Debug log for messages and document counts
|
|
message_count = len(messages)
|
|
logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}")
|
|
|
|
# Check if message_ids exists and is valid
|
|
message_ids = workflow.get("message_ids", [])
|
|
if not message_ids or len(message_ids) != len(messages):
|
|
# Rebuild message_ids from messages
|
|
message_ids = [msg.get("id") for msg in messages]
|
|
# Update in database
|
|
self.update_workflow(workflow_id, {"message_ids": message_ids})
|
|
logger.info(f"Rebuilt message_ids for workflow {workflow_id}")
|
|
|
|
# Log document counts for each message
|
|
for msg in messages:
|
|
doc_count = len(msg.get("documents", []))
|
|
if doc_count > 0:
|
|
logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database")
|
|
|
|
# Load logs
|
|
logs = self.get_workflow_logs(workflow_id)
|
|
# Sort by timestamp
|
|
logs.sort(key=lambda x: x.get("timestamp", ""))
|
|
|
|
# Assemble complete workflow object
|
|
complete_workflow = workflow.copy()
|
|
complete_workflow["messages"] = messages
|
|
complete_workflow["message_ids"] = message_ids # Ensure message_ids is included
|
|
complete_workflow["logs"] = logs
|
|
|
|
return complete_workflow
|
|
except Exception as e:
|
|
logger.error(f"Error loading workflow state: {str(e)}")
|
|
return None
|
|
|
|
# Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext
|
|
_lucydom_interfaces = {}
|
|
|
|
def get_lucydom_interface(mandate_id: int = 0, user_id: int = 0) -> LucyDOMInterface:
|
|
"""
|
|
Gibt eine LucyDOMInterface-Instanz für den angegebenen Kontext zurück.
|
|
Wiederverwendet bestehende Instanzen.
|
|
"""
|
|
context_key = f"{mandate_id}_{user_id}"
|
|
if context_key not in _lucydom_interfaces:
|
|
_lucydom_interfaces[context_key] = LucyDOMInterface(mandate_id, user_id)
|
|
return _lucydom_interfaces[context_key]
|
|
|
|
# Init
|
|
get_lucydom_interface() |