basic set for integration testing

This commit is contained in:
ValueOn AG 2025-04-21 17:44:28 +02:00
parent 8b234a9a30
commit f90483e3fd
24 changed files with 3303 additions and 1684 deletions

View file

@ -19,18 +19,6 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-opus-20240229
Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000
# Web scraping configuration
Connector_AiWebscraping_TIMEOUT = 10
Connector_AiWebscraping_MAX_URLS = 3
Connector_AiWebscraping_MAX_TOKENS = 30000
Connector_AiWebscraping_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
Connector_AiWebscraping_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q=
Connector_AiWebscraping_MAX_SEARCH_KEYWORDS = 3
Connector_AiWebscraping_MAX_SEARCH_RESULTS = 5
# AgentService configuration
Module_AgentserviceInterface_UPLOAD_DIR = ./_uploads
# File management configuration
File_Management_MAX_UPLOAD_SIZE_MB = 50
File_Management_CLEANUP_INTERVAL = 240
@ -50,4 +38,17 @@ Security_PASSWORD_REQUIRE_LOWERCASE = True
Security_PASSWORD_REQUIRE_NUMBERS = True
Security_PASSWORD_REQUIRE_SPECIAL = True
Security_FAILED_LOGIN_LIMIT = 5
Security_LOCK_DURATION_MINUTES = 30
Security_LOCK_DURATION_MINUTES = 30
# Agent Webcrawler configuration
Agent_Webcrawler_TIMEOUT = 10
Agent_Webcrawler_MAX_URLS = 3
Agent_Webcrawler_MAX_TOKENS = 30000
Agent_Webcrawler_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
Agent_Webcrawler_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q=
Agent_Webcrawler_MAX_SEARCH_KEYWORDS = 3
Agent_Webcrawler_MAX_SEARCH_RESULTS = 5
# Agent Coder configuration
Agent_Coder_INSTALL_TIMEOUT = 180
Agent_Coder_EXECUTION_TIMEOUT = 60

View file

@ -10,76 +10,76 @@ logger = logging.getLogger(__name__)
class DatabaseConnector:
"""
Ein Konnektor für JSON-basierte Datenspeicherung.
Stellt generische Datenbankoperationen bereit.
A connector for JSON-based data storage.
Provides generic database operations.
"""
def __init__(self, db_host: str, db_database: str, db_user: str = None, db_password: str = None, mandate_id: int = None, user_id: int = None):
"""
Initialisiert den JSON-Datenbankkonnektor.
Initializes the JSON database connector.
Args:
db_host: Verzeichnis für die JSON-Dateien
db_database = Datenbankname
db_user: Benutzername für die Authentifizierung (optional)
db_password: API-Schlüssel für die Authentifizierung (optional)
mandate_id: Kontext-Parameter für den Mandanten
user_id: Kontext-Parameter für den Benutzer
db_host: Directory for the JSON files
db_database = Database name
db_user: Username for authentication (optional)
db_password: API key for authentication (optional)
mandate_id: Context parameter for the tenant
user_id: Context parameter for the user
"""
# Speichere die Eingabeparameter
# Store the input parameters
self.db_host = db_host
self.db_database = db_database
self.db_user = db_user
self.db_password = db_password
# Prüfe, ob Kontext-Parameter gesetzt sind
# Check if context parameters are set
if mandate_id is None or user_id is None:
raise ValueError("mandate_id und user_id müssen gesetzt sein")
raise ValueError("mandate_id and user_id must be set")
# Stelle sicher, dass das Datenbankverzeichnis existiert
# Ensure the database directory exists
self.db_folder=os.path.join(self.db_host,self.db_database)
os.makedirs(self.db_folder, exist_ok=True)
# Cache für geladene Daten
# Cache for loaded data
self._tables_cache = {}
# System-Tabelle initialisieren
# Initialize system table
self._system_table_name = "_system"
self._initialize_system_table()
# Temporär mandate_id und user_id speichern
# Temporarily store mandate_id and user_id
self._mandate_id = mandate_id
self._user_id = user_id
# Wenn mandate_id oder user_id 0 sind, versuche die initialen IDs zu verwenden
# If mandate_id or user_id are 0, try to use the initial IDs
if mandate_id == 0:
initial_mandate_id = self.get_initial_id("mandates")
if initial_mandate_id is not None:
self._mandate_id = initial_mandate_id
logger.info(f"Verwende initiale mandate_id: {initial_mandate_id} statt 0")
logger.info(f"Using initial mandate_id: {initial_mandate_id} instead of 0")
if user_id == 0:
initial_user_id = self.get_initial_id("users")
if initial_user_id is not None:
self._user_id = initial_user_id
logger.info(f"Verwende initiale user_id: {initial_user_id} statt 0")
logger.info(f"Using initial user_id: {initial_user_id} instead of 0")
# Setze die effektiven IDs als Eigenschaften
# Set the effective IDs as properties
self.mandate_id = self._mandate_id
self.user_id = self._user_id
logger.info(f"DatabaseConnector initialisiert für Verzeichnis: {self.db_folder}")
logger.debug(f"Kontext: mandate_id={self.mandate_id}, user_id={self.user_id}")
logger.info(f"DatabaseConnector initialized for directory: {self.db_folder}")
logger.debug(f"Context: mandate_id={self.mandate_id}, user_id={self.user_id}")
def _initialize_system_table(self):
"""Initialisiert die System-Tabelle, falls sie noch nicht existiert."""
"""Initializes the system table if it doesn't exist yet."""
system_table_path = self._get_table_path(self._system_table_name)
if not os.path.exists(system_table_path):
empty_system_table = {}
self._save_system_table(empty_system_table)
logger.info(f"System-Tabelle initialisiert in {system_table_path}")
logger.info(f"System table initialized in {system_table_path}")
def _load_system_table(self) -> Dict[str, int]:
"""Lädt die System-Tabelle mit den initialen IDs."""
"""Loads the system table with the initial IDs."""
system_table_path = self._get_table_path(self._system_table_name)
try:
if os.path.exists(system_table_path):
@ -88,66 +88,66 @@ class DatabaseConnector:
else:
return {}
except Exception as e:
logger.error(f"Fehler beim Laden der System-Tabelle: {e}")
logger.error(f"Error loading the system table: {e}")
return {}
def _save_system_table(self, data: Dict[str, int]) -> bool:
"""Speichert die System-Tabelle mit den initialen IDs."""
"""Saves the system table with the initial IDs."""
system_table_path = self._get_table_path(self._system_table_name)
try:
with open(system_table_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
return True
except Exception as e:
logger.error(f"Fehler beim Speichern der System-Tabelle: {e}")
logger.error(f"Error saving the system table: {e}")
return False
def _get_table_path(self, table: str) -> str:
"""Gibt den vollständigen Pfad zu einer Tabellendatei zurück"""
"""Returns the full path to a table file"""
return os.path.join(self.db_folder, f"{table}.json")
def _load_table(self, table: str) -> List[Dict[str, Any]]:
"""Lädt eine Tabelle aus der entsprechenden JSON-Datei"""
"""Loads a table from the corresponding JSON file"""
path = self._get_table_path(table)
# Wenn die Tabelle die System-Tabelle ist, lade sie direkt
# If the table is the system table, load it directly
if table == self._system_table_name:
return [] # Die System-Tabelle wird nicht wie normale Tabellen behandelt
return [] # The system table is not treated like normal tables
# Wenn die Tabelle bereits im Cache ist, verwende den Cache
# If the table is already in the cache, use the cache
if table in self._tables_cache:
# logger.info(f"Lade Tabelle {table} aus Cache")
# logger.info(f"Loading table {table} from cache")
return self._tables_cache[table]
# Ansonsten lade die Datei
# Otherwise load the file
try:
if os.path.exists(path):
# logger.info(f"Lade Tabelle {table} aus JSON {path}")
# logger.info(f"Loading table {table} from JSON {path}")
with open(path, 'r', encoding='utf-8') as f:
data = json.load(f)
self._tables_cache[table] = data
# Wenn Daten geladen wurden und noch keine initiale ID registriert ist,
# registriere die ID des ersten Datensatzes (falls vorhanden)
# If data was loaded and no initial ID is registered yet,
# register the ID of the first record (if available)
if data and not self.has_initial_id(table):
if "id" in data[0]:
self.register_initial_id(table, data[0]["id"])
logger.info(f"Initiale ID {data[0]['id']} für Tabelle {table} nachträglich registriert")
self._register_initial_id(table, data[0]["id"])
logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
return data
else:
# Wenn die Datei nicht existiert, erstelle eine leere Tabelle
logger.info(f"Neue Tabelle {table}")
# If the file doesn't exist, create an empty table
logger.info(f"New table {table}")
self._tables_cache[table] = []
self._save_table(table, [])
return []
except Exception as e:
logger.error(f"Fehler beim Laden der Tabelle {table}: {e}")
logger.error(f"Error loading table {table}: {e}")
return []
def _save_table(self, table: str, data: List[Dict[str, Any]]) -> bool:
"""Speichert eine Tabelle in der entsprechenden JSON-Datei"""
# Die System-Tabelle wird speziell behandelt
"""Saves a table to the corresponding JSON file"""
# The system table is handled specially
if table == self._system_table_name:
return False
@ -156,43 +156,44 @@ class DatabaseConnector:
with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Aktualisiere den Cache
# Update the cache
self._tables_cache[table] = data
return True
except Exception as e:
logger.error(f"Fehler beim Speichern der Tabelle {table}: {e}")
logger.error(f"Error saving table {table}: {e}")
return False
def _filter_by_context(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Filtert Datensätze nach dem Mandanten- und Benutzerkontext,
sofern diese Felder im Datensatz existieren.
Filters records by tenant and user context,
if these fields exist in the record.
"""
filtered_records = []
for record in records:
# Prüfe, ob mandate_id im Datensatz existiert und nicht null ist
# Check if mandate_id exists in the record and is not null
has_mandate = "mandate_id" in record and record["mandate_id"] is not None and record["mandate_id"] != ""
# Prüfe, ob user_id im Datensatz existiert und nicht null ist
# Check if user_id exists in the record and is not null
has_user = "user_id" in record and record["user_id"] is not None and record["user_id"] != ""
# Wenn beides existiert, filtere entsprechend
# If both exist, filter accordingly
if has_mandate and has_user:
if record["mandate_id"] == self.mandate_id:
filtered_records.append(record)
# Wenn nur mandate_id existiert
# If only mandate_id exists
elif has_mandate and not has_user:
if record["mandate_id"] == self.mandate_id:
filtered_records.append(record)
# Wenn weder mandate_id noch user_id existieren, füge den Datensatz hinzu
# If neither mandate_id nor user_id exist, add the record
elif not has_mandate and not has_user:
filtered_records.append(record)
return filtered_records
def _apply_record_filter(self, records: List[Dict[str, Any]], record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Wendet einen Datensatzfilter auf die Datensätze an"""
"""Applies a record filter to the records"""
if not record_filter:
return records
@ -202,17 +203,17 @@ class DatabaseConnector:
match = True
for field, value in record_filter.items():
# Prüfen, ob das Feld existiert
# Check if the field exists
if field not in record:
match = False
break
# Wenn der Filterwert ein Integer-String ist und das Datensatzfeld ein Integer
# If the filter value is an integer string and the record field is an integer
if isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
if record[field] != int(value):
match = False
break
# Sonst direkter Vergleich
# Otherwise direct comparison
elif record[field] != value:
match = False
break
@ -222,66 +223,120 @@ class DatabaseConnector:
return filtered_records
def _register_initial_id(self, table: str, initial_id: int) -> bool:
"""
Registers the initial ID for a table.
Args:
table: Name of the table
initial_id: The initial ID
Returns:
True on success, False on error
"""
try:
# Load the current system table
system_data = self._load_system_table()
# Only register if not already present
if table not in system_data:
system_data[table] = initial_id
success = self._save_system_table(system_data)
if success:
logger.info(f"Initial ID {initial_id} for table {table} registered")
return success
return True # If already present, this is not an error
except Exception as e:
logger.error(f"Error registering the initial ID for table {table}: {e}")
return False
def _remove_initial_id(self, table: str) -> bool:
"""
Removes the initial ID for a table from the system table.
Args:
table: Name of the table
Returns:
True on success, False on error
"""
try:
# Load the current system table
system_data = self._load_system_table()
# Remove the entry if it exists
if table in system_data:
del system_data[table]
success = self._save_system_table(system_data)
if success:
logger.info(f"Initial ID for table {table} removed from system table")
return success
return True # If not present, this is not an error
except Exception as e:
logger.error(f"Error removing initial ID for table {table}: {e}")
return False
# Public API
def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]:
"""
Gibt eine Liste aller verfügbaren Tabellen zurück.
Returns a list of all available tables.
Args:
filter_criteria: Optionale Filterkriterien (nicht implementiert)
filter_criteria: Optional filter criteria (not implemented)
Returns:
Liste der Tabellennamen
List of table names
"""
tables = []
try:
for filename in os.listdir(self.db_folder):
if filename.endswith('.json') and not filename.startswith('_'):
table_name = filename[:-5] # Entferne die .json-Endung
table_name = filename[:-5] # Remove the .json extension
tables.append(table_name)
except Exception as e:
logger.error(f"Fehler beim Lesen des Datenbankverzeichnisses: {e}")
logger.error(f"Error reading the database directory: {e}")
return tables
def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]:
"""
Gibt eine Liste aller Felder einer Tabelle zurück.
Returns a list of all fields in a table.
Args:
table: Name der Tabelle
filter_criteria: Optionale Filterkriterien (nicht implementiert)
table: Name of the table
filter_criteria: Optional filter criteria (not implemented)
Returns:
Liste der Feldnamen
List of field names
"""
# Lade die Tabellendaten
# Load the table data
data = self._load_table(table)
if not data:
return []
# Nehme den ersten Datensatz als Referenz für die Felder
# Take the first record as a reference for the fields
fields = list(data[0].keys()) if data else []
return fields
def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]:
"""
Gibt ein Schema-Objekt für eine Tabelle zurück mit Datentypen und Labels.
Returns a schema object for a table with data types and labels.
Args:
table: Name der Tabelle
language: Sprache für die Labels (optional)
filter_criteria: Optionale Filterkriterien (nicht implementiert)
table: Name of the table
language: Language for the labels (optional)
filter_criteria: Optional filter criteria (not implemented)
Returns:
Schema-Objekt mit Feldern, Datentypen und Labels
Schema object with fields, data types and labels
"""
# Lade die Tabellendaten
# Load the table data
data = self._load_table(table)
schema = {}
@ -289,18 +344,18 @@ class DatabaseConnector:
if not data:
return schema
# Nehme den ersten Datensatz als Referenz für die Felder und Datentypen
# Take the first record as a reference for the fields and data types
first_record = data[0]
for field, value in first_record.items():
# Bestimme den Datentyp
# Determine the data type
data_type = type(value).__name__
# Label erstellen (Standardwert ist der Feldname)
# Create label (default is the field name)
label = field
# Wenn model_info verfügbar ist, versuche das Label aus dem Modell zu holen
# Implementierung hängt vom tatsächlichen Modell ab
# If model_info is available, try to get the label from the model
# Implementation depends on the actual model
schema[field] = {
"type": data_type,
@ -311,27 +366,27 @@ class DatabaseConnector:
def get_recordset(self, table: str, field_filter: Dict[str, Any] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""
Gibt eine Liste von Datensätzen aus einer Tabelle zurück, gefiltert nach Kriterien.
Returns a list of records from a table, filtered by criteria.
Args:
table: Name der Tabelle
field_filter: Filter für Felder (welche Felder zurückgegeben werden sollen)
record_filter: Filter für Datensätze (welche Datensätze zurückgegeben werden sollen)
table: Name of the table
field_filter: Filter for fields (which fields should be returned)
record_filter: Filter for records (which records should be returned)
Returns:
Liste der gefilterten Datensätze
List of filtered records
"""
# Lade die Tabellendaten
# Load the table data
data = self._load_table(table)
# Filtere nach Mandanten- und Benutzerkontext
# Filter by tenant and user context
filtered_data = self._filter_by_context(data)
# Wende record_filter an, wenn vorhanden
# Apply record_filter if available
if record_filter:
filtered_data = self._apply_record_filter(filtered_data, record_filter)
# Wenn field_filter vorhanden ist, reduziere die Felder
# If field_filter is available, reduce the fields
if field_filter and isinstance(field_filter, list):
result = []
for record in filtered_data:
@ -346,187 +401,157 @@ class DatabaseConnector:
def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Erstellt einen neuen Datensatz in der Tabelle.
Creates a new record in the table.
Args:
table: Name der Tabelle
record_data: Daten für den neuen Datensatz
table: Name of the table
record_data: Data for the new record
Returns:
Der erstellte Datensatz
The created record
"""
# Lade die Tabellendaten
# Load the table data
data = self._load_table(table)
# Füge mandate_id und user_id hinzu, falls nicht vorhanden oder 0
# Add mandate_id and user_id if not present or 0
if "mandate_id" not in record_data or record_data["mandate_id"] == 0:
record_data["mandate_id"] = self.mandate_id
if "user_id" not in record_data or record_data["user_id"] == 0:
record_data["user_id"] = self.user_id
# Bestimme die nächste ID, falls nicht vorhanden
# Determine the next ID if not present
if "id" not in record_data:
next_id = 1
if data:
next_id = max(record["id"] for record in data if "id" in record) + 1
record_data["id"] = next_id
# Wenn die Tabelle leer ist und eine System-ID registriert werden soll
# If the table is empty and a system ID should be registered
if not data:
self.register_initial_id(table, record_data["id"])
logger.info(f"Initiale ID {record_data['id']} für Tabelle {table} wurde registriert")
self._register_initial_id(table, record_data["id"])
logger.info(f"Initial ID {record_data['id']} for table {table} has been registered")
# Füge den neuen Datensatz hinzu
# Add the new record
data.append(record_data)
# Speichere die aktualisierte Tabelle
# Save the updated table
if self._save_table(table, data):
return record_data
else:
raise ValueError(f"Fehler beim Erstellen des Datensatzes in Tabelle {table}")
raise ValueError(f"Error creating the record in table {table}")
def record_delete(self, table: str, record_id: Union[str, int]) -> bool:
"""
Löscht einen Datensatz aus der Tabelle.
Deletes a record from the table.
Args:
table: Name der Tabelle
record_id: ID des zu löschenden Datensatzes
table: Name of the table
record_id: ID of the record to delete
Returns:
True bei Erfolg, False bei Fehler
True on success, False on error
"""
# Lade die Tabellendaten
# Load table data
data = self._load_table(table)
# Prüfe, ob es sich um die initiale ID handelt
initial_id = self.get_initial_id(table)
if initial_id is not None and initial_id == record_id:
logger.warning(f"Versuch, den initialen Datensatz mit ID {record_id} aus Tabelle {table} zu löschen, wurde verhindert")
return False
# Suche den Datensatz
# Search for the record
for i, record in enumerate(data):
if "id" in record and record["id"] == record_id:
# Prüfe, ob der Datensatz zum aktuellen Mandanten gehört
# Check if the record belongs to the current mandate
if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
raise ValueError("Not your mandate")
# Lösche den Datensatz
# Check if it's an initial record
initial_id = self.get_initial_id(table)
if initial_id is not None and initial_id == record_id:
# Remove this entry from the system table
self._remove_initial_id(table)
logger.info(f"Initial ID {record_id} for table {table} has been removed from the system table")
# Delete the record
del data[i]
# Speichere die aktualisierte Tabelle
# Save the updated table
return self._save_table(table, data)
# Datensatz nicht gefunden
# Record not found
return False
def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Ändert einen Datensatz in der Tabelle.
Modifies a record in the table.
Args:
table: Name der Tabelle
record_id: ID des zu ändernden Datensatzes
record_data: Neue Daten für den Datensatz
table: Name of the table
record_id: ID of the record to modify
record_data: New data for the record
Returns:
Der aktualisierte Datensatz
The updated record
"""
# Lade die Tabellendaten
# Load table data
data = self._load_table(table)
# Suche den Datensatz
# Search for the record
for i, record in enumerate(data):
if "id" in record and record["id"] == record_id:
# Prüfe, ob der Datensatz zum aktuellen Mandanten gehört
# Check if the record belongs to the current mandate
if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
raise ValueError("Not your mandate")
# Verhindere Änderung der ID bei initialem Datensatz
initial_id = self.get_initial_id(table)
if initial_id is not None and initial_id == record_id and "id" in record_data and record_data["id"] != record_id:
raise ValueError(f"Die ID des initialen Datensatzes in Tabelle {table} kann nicht geändert werden")
# Prevent changing the ID
if "id" in record_data and record_data["id"] != record_id:
raise ValueError(f"The ID of a record in table {table} cannot be changed")
# Aktualisiere den Datensatz
# Update the record
for key, value in record_data.items():
data[i][key] = value
# Speichere die aktualisierte Tabelle
# Save the updated table
if self._save_table(table, data):
return data[i]
else:
raise ValueError(f"Fehler beim Aktualisieren des Datensatzes in Tabelle {table}")
raise ValueError(f"Error updating record in table {table}")
# Datensatz nicht gefunden
raise ValueError(f"Datensatz mit ID {record_id} nicht gefunden in Tabelle {table}")
# System-Tabellen-Funktionen
def register_initial_id(self, table: str, initial_id: int) -> bool:
"""
Registriert die initiale ID für eine Tabelle.
Args:
table: Name der Tabelle
initial_id: Die initiale ID
Returns:
True bei Erfolg, False bei Fehler
"""
try:
# Lade die aktuelle System-Tabelle
system_data = self._load_system_table()
# Nur registrieren, wenn noch nicht vorhanden
if table not in system_data:
system_data[table] = initial_id
success = self._save_system_table(system_data)
if success:
logger.info(f"Initiale ID {initial_id} für Tabelle {table} registriert")
return success
return True # Wenn bereits vorhanden, ist das kein Fehler
except Exception as e:
logger.error(f"Fehler beim Registrieren der initialen ID für Tabelle {table}: {e}")
return False
def get_initial_id(self, table: str) -> Optional[int]:
"""
Gibt die initiale ID für eine Tabelle zurück.
Args:
table: Name der Tabelle
Returns:
Die initiale ID oder None, wenn nicht vorhanden
"""
system_data = self._load_system_table()
initial_id = system_data.get(table)
if initial_id is None:
logger.debug(f"Keine initiale ID für Tabelle {table} gefunden")
return initial_id
# Record not found
raise ValueError(f"Record with ID {record_id} not found in table {table}")
def has_initial_id(self, table: str) -> bool:
"""
Prüft, ob eine initiale ID für eine Tabelle registriert ist.
Checks if an initial ID is registered for a table.
Args:
table: Name der Tabelle
table: Name of the table
Returns:
True, wenn eine initiale ID registriert ist, sonst False
True if an initial ID is registered, otherwise False
"""
system_data = self._load_system_table()
return table in system_data
def get_all_initial_ids(self) -> Dict[str, int]:
def get_initial_id(self, table: str) -> Optional[int]:
"""
Gibt alle registrierten initialen IDs zurück.
Returns the initial ID for a table.
Args:
table: Name of the table
Returns:
Dictionary mit Tabellennamen als Schlüssel und initialen IDs als Werte
The initial ID or None if not present
"""
system_data = self._load_system_table()
return system_data.copy() # Kopie zurückgeben, um das Original zu schützen
initial_id = system_data.get(table)
if initial_id is None:
logger.debug(f"No initial ID found for table {table}")
return initial_id
def get_all_initial_ids(self) -> Dict[str, int]:
"""
Returns all registered initial IDs.
Returns:
Dictionary with table names as keys and initial IDs as values
"""
system_data = self._load_system_table()
return system_data.copy() # Return a copy to protect the original

View file

@ -0,0 +1,814 @@
"""
Coder agent for development and execution of Python code.
Optimized for the new task-based processing.
"""
import logging
import json
import re
import uuid
import os
import subprocess
import tempfile
import shutil
import sys
from typing import Dict, Any, List, Optional, Tuple
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentCoder(AgentBase):
"""Agent for development and execution of Python code"""
def __init__(self):
"""Initialize the coder agent"""
super().__init__()
self.name = "coder"
self.description = "Develops and executes Python code for data processing and automation"
self.capabilities = [
"code_development",
"data_processing",
"file_processing",
"automation",
"code_execution"
]
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.1 # Lower temperature for deterministic code generation
# Auto-correction settings
self.max_correction_attempts = 3 # Maximum number of correction attempts
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and perform code development/execution.
Args:
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
A dictionary containing:
- feedback: Text response explaining the code execution
- documents: List of created document objects
"""
try:
# Extract relevant task information
prompt = task.get("prompt", "")
input_documents = task.get("input_documents", [])
output_specs = task.get("output_specifications", [])
context_info = task.get("context", {})
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Coder agent")
return {
"feedback": "The Coder agent is not properly configured.",
"documents": []
}
# Extract context from input documents
document_context = self._extract_document_context(input_documents)
# Generate code based on the prompt and document context
logger.info("Generating code based on the task")
code_to_execute, requirements = await self._generate_code_from_prompt(prompt, document_context)
if not code_to_execute:
logger.warning("AI couldn't generate any code")
return {
"feedback": "I couldn't generate executable code based on the task. Please provide more detailed instructions.",
"documents": []
}
logger.info(f"Code generated with AI ({len(code_to_execute)} characters)")
# Collect created documents
generated_documents = []
# Add code as first document
code_doc = {
"label": "generated_code.py",
"content": code_to_execute
}
generated_documents.append(code_doc)
# Execute code with auto-correction loop
execution_context = {
"input_documents": input_documents,
"task": task
}
# Enhanced execution with auto-correction
result, attempts_info = await self._execute_with_auto_correction(
code_to_execute,
requirements,
execution_context,
prompt # Original prompt/message
)
# Create output documents based on execution result and output specifications
if result.get("success", False):
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logger.info("Code executed successfully")
# Determine output type of the result
result_docs = self._generate_result_documents(
attempts_info[-1]["code"], # Last successful code
output,
execution_result,
output_specs
)
# Add result documents
generated_documents.extend(result_docs)
# Create feedback for successful execution
feedback = f"I successfully executed the code and generated {len(result_docs)} output files."
if attempts_info and len(attempts_info) > 1:
feedback += f" (This required {len(attempts_info)-1} correction attempts)"
else:
# Code execution failed after all attempts
error = result.get("error", "Unknown error")
logger.error(f"Error in code execution after all correction attempts: {error}")
# Add error log as additional document
error_doc = {
"label": "execution_error.txt",
"content": f"Execution error:\n\n{error}"
}
generated_documents.append(error_doc)
# Create feedback for failed execution
feedback = f"An error occurred during code execution after {len(attempts_info)} correction attempts."
# If no specific outputs requested, create standard outputs
if not output_specs and result.get("success", False):
# Add standard output document
output_doc = {
"label": "execution_output.txt",
"content": output
}
generated_documents.append(output_doc)
# If a result is available, also add as JSON document
if execution_result:
result_json = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
result_doc = {
"label": "execution_result.json",
"content": result_json
}
generated_documents.append(result_doc)
return {
"feedback": feedback,
"documents": generated_documents
}
except Exception as e:
error_msg = f"Error during processing by the Coder agent: {str(e)}"
logger.error(error_msg)
return {
"feedback": f"An error occurred during code processing: {str(e)}",
"documents": []
}
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents for code generation.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
context_parts = []
for doc in documents:
doc_name = doc.get("name", "Unnamed document")
context_parts.append(f"--- {doc_name} ---")
for content in doc.get("contents", []):
if content.get("metadata", {}).get("is_text", False):
context_parts.append(content.get("data", ""))
return "\n\n".join(context_parts)
def _generate_result_documents(self, code: str, output: str, execution_result: Any,
output_specs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Generate output documents based on execution results and specifications.
Args:
code: Executed code
output: Text output of the execution
execution_result: Result object from execution
output_specs: Output specifications
Returns:
List of generated document objects
"""
documents = []
# If no specific outputs requested
if not output_specs:
return documents
# Generate appropriate document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Determine output type based on file extension
format_type = self._determine_format_type(output_label)
# Generate document content based on format and output
if "code" in output_label.lower() or format_type in ["py", "js", "html", "css"]:
# Code document
documents.append({
"label": output_label,
"content": code
})
elif "output" in output_label.lower() or format_type == "txt":
# Output document
documents.append({
"label": output_label,
"content": output
})
elif format_type in ["json", "yml", "yaml"] and execution_result:
# JSON result document
if isinstance(execution_result, (dict, list)):
content = json.dumps(execution_result, indent=2)
else:
content = str(execution_result)
documents.append({
"label": output_label,
"content": content
})
else:
# Generic result document (fallback)
result_str = ""
if execution_result:
if isinstance(execution_result, (dict, list)):
result_str = json.dumps(execution_result, indent=2)
else:
result_str = str(execution_result)
documents.append({
"label": output_label,
"content": f"Code output:\n\n{output}\n\nResult:\n\n{result_str}"
})
return documents
def _determine_format_type(self, output_label: str) -> str:
"""
Determine the format type based on the filename.
Args:
output_label: Output filename
Returns:
Format type (py, js, json, txt, etc.)
"""
if not '.' in output_label:
return "txt" # Default format
extension = output_label.split('.')[-1].lower()
return extension
async def _execute_with_auto_correction(
self,
initial_code: str,
requirements: List[str],
context: Dict[str, Any],
original_prompt: str
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Execute code with automatic error correction and retry attempts.
Args:
initial_code: The initial Python code
requirements: List of required packages
context: Additional context for execution
original_prompt: The original user request/prompt
Returns:
Tuple of (final execution result, list of attempt info dictionaries)
"""
# Initialize tracking data
current_code = initial_code
current_requirements = requirements.copy() if requirements else []
attempts_info = []
# Execute with correction loop
for attempt in range(1, self.max_correction_attempts + 1):
if attempt == 1:
logger.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})")
else:
logger.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})")
# Execute current code version
result = await self._execute_code(current_code, current_requirements, context)
# Record attempt information
attempts_info.append({
"attempt": attempt,
"code": current_code,
"error": result.get("error", ""),
"success": result.get("success", False)
})
# Check if execution was successful
if result.get("success", False):
# Success! Return result and attempt info
return result, attempts_info
# Failed execution - check if max attempt limit reached
if attempt >= self.max_correction_attempts:
logger.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached")
break
# Correct code based on the error
error_message = result.get("error", "Unknown error")
logger.info(f"Attempting to fix code error: {error_message[:200]}...")
# Generate corrected code
corrected_code, new_requirements = await self._generate_code_correction(
current_code,
error_message,
original_prompt,
current_requirements
)
# Update for next attempt
if corrected_code:
current_code = corrected_code
# Add new requirements
if new_requirements:
for req in new_requirements:
if req not in current_requirements:
current_requirements.append(req)
logger.info(f"Added new requirement: {req}")
else:
# Correction couldn't be generated, end loop
logger.warning("Couldn't generate code correction")
break
# If we reach here, all attempts failed - return last result and attempt info
return result, attempts_info
async def _generate_code_correction(
self,
code: str,
error_message: str,
original_prompt: str,
current_requirements: List[str] = None
) -> Tuple[str, List[str]]:
"""
Generate a corrected version of code based on error messages.
Args:
code: The code that generated errors
error_message: The error message to fix
original_prompt: The original task/requirements
current_requirements: List of currently required packages
Returns:
Tuple of (corrected code, new requirements list)
"""
try:
# Create detailed prompt for code correction
correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
ORIGINAL TASK:
{original_prompt}
CURRENT CODE:
```python
{code}
```
ERROR MESSAGE:
```
{error_message}
```
CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
Your task is to analyze the error and provide a corrected version of the code.
Focus specifically on fixing the error while maintaining the original functionality.
Common fixes include:
- Fixing syntax errors (missing parentheses, indentation, etc.)
- Solving import errors by adding appropriate requirements
- Correcting file paths or handling "file not found" errors
- Adding error handling for specific edge cases
- Fixing logical errors in the code
FORMATTING GUIDELINES:
1. Provide ONLY the complete corrected Python code WITHOUT explanations
2. Do NOT use code block markers like ```python or ```
3. Do NOT explain what the code does before or after
4. Do NOT add any text that isn't valid Python code
5. Start your answer directly with valid Python code
6. End your answer with valid Python code
If you need to add new required packages, place them in a specially formatted comment at the beginning of your code as follows:
# REQUIREMENTS: package1,package2,package3
Your entire answer must be valid Python that can be executed without modifications.
"""
# Create messages for API
messages = [
{"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, error-free Python code, without explanations, markdown formatting, or text that isn't code."},
{"role": "user", "content": correction_prompt}
]
# Call API with very low temperature for deterministic corrections
generated_content = await self.ai_service.call_api(
messages,
temperature=0.1
)
# Clean up the generated content to ensure it's only valid Python code
fixed_code = self._clean_code(generated_content)
# Extract requirements from special comment at beginning of code
new_requirements = []
for line in fixed_code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
new_requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return fixed_code, new_requirements
except Exception as e:
logging.error(f"Error generating code correction: {str(e)}")
# Return None to indicate failure
return None, []
def _clean_code(self, code: str) -> str:
"""
Clean code by removing markdown code block markers and other formatting artifacts.
Args:
code: The code string to clean
Returns:
Cleaned code string
"""
# Remove code block markers at beginning/end
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
# Process lines in reverse order to start from the end
lines = code.split('\n')
clean_lines = []
in_trailing_markdown = False
for line in reversed(lines):
stripped = line.strip()
# Check if this line contains only backticks (``` or ` or ``)
if re.match(r'^`{1,3}$', stripped):
in_trailing_markdown = True
continue
# If we've reached actual code, no more trailing markdown consideration
if stripped and not in_trailing_markdown:
in_trailing_markdown = False
# Add this line if it's not part of trailing markdown
if not in_trailing_markdown:
clean_lines.insert(0, line)
# Rejoin lines
clean_code = '\n'.join(clean_lines)
# Final cleanup for any remaining backticks
clean_code = re.sub(r'`{1,3}\s*', '', clean_code)
return clean_code.strip()
async def _generate_code_from_prompt(self, prompt: str, document_context: str) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt using the AI service.
Args:
prompt: The prompt to generate code from
document_context: Context extracted from documents
Returns:
Tuple of (generated Python code, required packages)
"""
try:
# Prepare prompt for code generation
ai_prompt = f"""Generate Python code to solve the following task:
TASK:
{prompt}
PROVIDED CONTEXT:
{document_context if document_context else "No additional context available."}
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable to store the final result.
2. At the end of your script, the result variable should be output.
3. Make your 'result' variable a dictionary or other JSON-serializable data structure containing all relevant outputs.
4. Comment your code well to explain important operations.
5. Make your code complete and self-contained.
6. Add appropriate error handling.
FORMATTING INSTRUCTIONS:
- Return ONLY the Python code, WITHOUT introduction, explanation, or conclusion text
- Do NOT use code block markers like ```python or ```
- Do NOT explain what the code does before or after
- Do NOT add any text that isn't valid Python code
- Start your answer directly with valid Python code
- End your answer with valid Python code
For required packages, place them in a specially formatted comment at the beginning of your code in one line as follows:
# REQUIREMENTS: pandas,numpy,matplotlib,requests
Your entire answer must be valid Python that can be executed without modifications.
"""
# Create messages for API
messages = [
{"role": "system", "content": "You are a Python code generator who provides ONLY clean, executable Python code with no explanations, markdown formatting, or non-code text."},
{"role": "user", "content": ai_prompt}
]
# Call API
logging.info(f"Calling AI API to generate code")
generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature)
# Clean up the generated content to ensure it's only valid Python code
code = self._clean_code(generated_content)
# Extract requirements from special comment at beginning of code
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return code, requirements
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}")
# Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
# Error in code generation
print(f"An error occurred during code generation: {error_str}")
# Return error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code in an isolated environment.
Args:
code: The Python code to execute
requirements: List of required packages
context: Additional context for execution
Returns:
Result of code execution
"""
# Use virtual code executor for isolated execution
try:
executor = SimpleCodeExecutor(
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
ai_service=self.ai_service
)
# Prepare input data for the code
input_data = {"context": context} if context else {}
# Execute code
result = executor.execute_code(code, input_data)
# Clean up environment
executor.cleanup()
return result
except Exception as e:
error_message = f"Error during code execution: {str(e)}"
logger.error(error_message)
return {
"success": False,
"output": "",
"error": error_message,
"result": None
}
class SimpleCodeExecutor:
"""
A simplified executor that runs Python code in isolated virtual environments.
"""
def __init__(self,
timeout: int = 30,
max_memory_mb: int = 512,
requirements: List[str] = None,
ai_service = None):
"""
Initialize the SimpleCodeExecutor.
Args:
timeout: Maximum execution time in seconds
max_memory_mb: Maximum memory in MB
requirements: List of packages to install
ai_service: Optional - AI service for further processing
"""
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.requirements = requirements or []
self.blocked_packages = [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource-intensive packages
]
self.ai_service = ai_service
def _create_venv(self) -> str:
"""Create a virtual environment and return the path."""
# Create new environment
venv_parent_dir = tempfile.mkdtemp(prefix="code_exec_")
self.temp_dir = venv_parent_dir
venv_path = os.path.join(venv_parent_dir, "venv")
try:
# Create virtual environment
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
return venv_path
except subprocess.CalledProcessError as e:
logger.error(f"Error creating virtual environment: {e}")
raise RuntimeError(f"Virtual environment could not be created: {e}")
def _get_python_executable(self, venv_path: str) -> str:
"""Return the path to the Python executable in the virtual environment."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code in an isolated environment.
Args:
code: Python code to execute
input_data: Optional input data for the code
Returns:
Dictionary with execution results
"""
logger.info("Executing code in isolated environment")
# Create virtual environment
venv_path = self._create_venv()
# Create file for the code
code_id = uuid.uuid4().hex[:8]
code_file = os.path.join(self.temp_dir, f"code_{code_id}.py")
# Write code
with open(code_file, "w", encoding="utf-8") as f:
f.write(code)
# Get Python executable
python_executable = self._get_python_executable(venv_path)
logger.info(f"Using Python executable: {python_executable}")
# Execute code
try:
# Execute code from root directory
working_dir = os.path.dirname(code_file)
process = subprocess.run(
[python_executable, code_file],
timeout=self.timeout,
capture_output=True,
text=True,
cwd=working_dir
)
# Process output
stdout = process.stdout
stderr = process.stderr
# Get result from stdout if available
result_data = None
if process.returncode == 0 and stdout:
try:
# Look for the last line that could be JSON
for line in reversed(stdout.strip().split('\n')):
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
result_data = json.loads(line)
# Use successfully parsed JSON result
break
except json.JSONDecodeError:
# Not valid JSON, continue with next line
continue
except Exception as e:
logger.warning(f"Error parsing result from stdout: {str(e)}")
# Create result dictionary
execution_result = {
"success": process.returncode == 0,
"output": stdout,
"error": stderr if process.returncode != 0 else "",
"result": result_data,
"exit_code": process.returncode
}
except subprocess.TimeoutExpired:
logger.error(f"Execution timed out after {self.timeout} seconds")
execution_result = {
"success": False,
"output": "",
"error": f"Execution timed out (timeout after {self.timeout} seconds)",
"result": None,
"exit_code": -1
}
except Exception as e:
logger.error(f"Execution error: {str(e)}")
execution_result = {
"success": False,
"output": "",
"error": f"Execution error: {str(e)}",
"result": None,
"exit_code": -1
}
# Clean up temporary code file
try:
if os.path.exists(code_file):
os.remove(code_file)
except Exception as e:
logger.warning(f"Error cleaning up temporary code file: {e}")
return execution_result
def cleanup(self):
"""Clean up temporary resources."""
# Clean up temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
try:
shutil.rmtree(self.temp_dir)
logger.info(f"Temporary directory deleted: {self.temp_dir}")
except Exception as e:
logger.warning(f"Temporary directory {self.temp_dir} could not be deleted: {e}")
def __del__(self):
"""Cleanup during garbage collection."""
self.cleanup()
# Factory function for the Coder agent
def get_coder_agent():
"""
Factory function that returns an instance of the Coder agent.
Returns:
An instance of the Coder agent
"""
return AgentCoder()

File diff suppressed because it is too large Load diff

View file

@ -41,7 +41,7 @@ class ChatManager:
self.ai_service = ChatService()
self.lucy_interface = get_lucydom_interface(mandate_id, user_id)
self.agent_registry = get_agent_registry()
self.agent_registry.set_ai_service(self.ai_service)
### Chat Management
@ -60,7 +60,7 @@ class ChatManager:
workflow = self.workflow_init(workflow_id)
# 2. User-Input in Message-Objekt transformieren und im Workflow speichern
message_user = self.chat_message_to_workflow("user", "", user_input, workflow)
message_user = await self.chat_message_to_workflow("user", "", user_input, workflow)
# 3. Projektleiter-Prompt erstellen und Antwort analysieren
project_manager_response = await self.chat_prompt(message_user, workflow)
@ -88,7 +88,7 @@ class ChatManager:
obj_results.extend(task_results)
# 6. Erstelle die finale Antwort mit den relevanten Dokumenten aus obj_final_documents
final_message = self.chat_final_message(obj_user_response, obj_results, obj_final_documents)
final_message = await self.chat_final_message(obj_user_response, obj_final_documents, obj_results)
self.message_add(workflow, final_message)
# 7. Finalisiere den Workflow
@ -215,7 +215,7 @@ JSON_OUTPUT = {{
# Parsen der JSON-Antwort
return self.parse_json_response(project_manager_output)
def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
async def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
@ -240,7 +240,7 @@ JSON_OUTPUT = {{
# Zusätzliche Dateien verarbeiten mit vollständigen Inhalten
additional_fileids = chat_message.get("list_file_id", [])
additional_files = self.process_file_ids(additional_fileids)
additional_files = await self.process_file_ids(additional_fileids)
# Nachrichtenobjekt erstellen
message_object = {
@ -254,30 +254,20 @@ JSON_OUTPUT = {{
logger.debug(f"message_user = {self.parse_json2text(message_object)}.")
return message_object
def chat_final_message(self, obj_user_response: str, obj_results: List[Dict[str, Any]],
obj_final_documents: List[Dict[str, Any]]) -> Dict[str, Any]:
async def chat_final_message(self, obj_user_response: str, obj_final_documents: List[Dict[str, Any]], obj_results: List[Dict[str, Any]], ) -> Dict[str, Any]:
"""
Creates the final response message with documents corresponding to obj_final_documents.
Creates the final response message with review of proposed and delivered.
Args:
obj_user_response: Text response to the user
obj_results: List of generated result documents
obj_user_response: Initial text response to the user
obj_final_documents: List of expected response documents
obj_results: List of generated result documents
Returns:
Complete message object with content and relevant documents
"""
# Create basic message structure
final_message = {
"role": "assistant",
"agent_name": "project_manager",
"content": obj_user_response,
"documents": []
}
"""
# Find documents that match the obj_final_documents requirements
matching_documents = []
doc_references = []
for answer_spec in obj_final_documents:
answer_label = answer_spec.get("label")
@ -287,38 +277,58 @@ JSON_OUTPUT = {{
doc_name=self.get_filename(doc)
# Check if this document matches the answer specification
if doc_name == answer_label:
matching_documents.append(doc)
doc_type = answer_spec.get("doc_type", "Document")
doc_references.append(f"- {doc_name} ({doc_type})")
content_ref = []
for c in doc.get("contents"):
content_ref.append(c.get("summary"))
doc_ref = {
"label": doc_name,
"content_summary": content_ref
}
matching_documents.append(doc_ref)
break
# Add matching documents to the final message
final_message["documents"] = matching_documents
# Add document references to the content if there are any
if doc_references:
doc_list = "\n".join(doc_references)
final_message["content"] += f"\n\nCreated documents:\n{doc_list}"
final_prompt = await self.ai_service.call_api([
{"role": "system", "content": "You are a project manager, who delivers results to a user."},
{"role": "user", "content": f"""
Give the final short feedback to the user with reference to the initial statement (obj_user_response). Provide a list of delivered files (files_deliveded). If in the list of delivered files (files_delivered) some files from the original list (files_promised) are not available, then just give a comment on this, otherwise task is completed.
Here the data:
obj_user_response = {self.parse_json2text(obj_user_response)}
files_promised = {self.parse_json2text(matching_documents)}
files_deliveded = {self.parse_json2text(obj_user_response)}
"""
}
])
# Create basic message structure with proper fields
logger.debug(f"FINAL PROMPT = {self.parse_json2text(final_prompt)}.")
final_message = {
"role": "assistant",
"agent_name": "project_manager",
"content": final_prompt,
"documents": [] # DO NOT include the results documents, already with agents
}
logger.debug(f"FINAL MESSAGE = {self.parse_json2text(final_message)}.")
return final_message
### Workflow
def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]:
"""
Initialisiert einen Workflow oder lädt einen bestehenden mit Rundenzählung.
Initializes a workflow or loads an existing one with round counting.
Args:
workflow_id: Optional - ID des zu ladenden Workflows
workflow_id: Optional - ID of the workflow to load
Returns:
Initialisiertes Workflow-Objekt
Initialized workflow object
"""
current_time = datetime.now().isoformat()
if workflow_id is None or not self.lucy_interface.get_workflow(workflow_id):
# Neuen Workflow erstellen
# Create new workflow
new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id
workflow = {
"id": new_workflow_id,
@ -326,7 +336,8 @@ JSON_OUTPUT = {{
"user_id": self.user_id,
"name": f"Workflow {new_workflow_id[:8]}",
"started_at": current_time,
"messages": [],
"messages": [], # Empty list - will be filled with references
"message_ids": [], # Initialize empty message_ids list
"logs": [],
"data_stats": {},
"current_round": 1,
@ -335,28 +346,84 @@ JSON_OUTPUT = {{
"waiting_for_user": False
}
# In Datenbank speichern
self.lucy_interface.create_workflow(workflow)
# Save to database - only the workflow metadata
workflow_db = {
"id": workflow["id"],
"mandate_id": workflow["mandate_id"],
"user_id": workflow["user_id"],
"name": workflow["name"],
"started_at": workflow["started_at"],
"status": workflow["status"],
"data_stats": workflow["data_stats"],
"current_round": workflow["current_round"],
"last_activity": workflow["last_activity"],
"waiting_for_user": workflow["waiting_for_user"],
"message_ids": workflow["message_ids"] # Include message_ids
}
self.lucy_interface.create_workflow(workflow_db)
return workflow
else:
# Bestehenden Workflow laden
# Load existing workflow
workflow = self.lucy_interface.load_workflow_state(workflow_id)
# Status aktualisieren und Rundenzähler inkrementieren
# Ensure message_ids exists
if "message_ids" not in workflow:
# Initialize from existing messages
workflow["message_ids"] = [msg["id"] for msg in workflow.get("messages", [])]
# Update in database
self.lucy_interface.update_workflow(workflow_id, {"message_ids": workflow["message_ids"]})
# Update status and increment round counter
workflow["status"] = "running"
workflow["last_activity"] = current_time
workflow["waiting_for_user"] = False
# Inkrementiere current_round, wenn sie existiert, sonst setze sie auf 1
# Increment current_round if it exists, otherwise set it to 1
if "current_round" in workflow:
workflow["current_round"] += 1
else:
workflow["current_round"] = 1
# In Datenbank aktualisieren
self.lucy_interface.save_workflow_state(workflow)
# Update in database - only the relevant workflow fields
workflow_update = {
"status": workflow["status"],
"last_activity": workflow["last_activity"],
"waiting_for_user": workflow["waiting_for_user"],
"current_round": workflow["current_round"]
}
self.lucy_interface.update_workflow(workflow_id, workflow_update)
return workflow
def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Finalizes a workflow and sets the status to 'completed'.
Args:
workflow: Workflow object
Returns:
Updated workflow object
"""
# Prepare workflow update data
workflow_update = {
"status": "completed",
"last_activity": datetime.now().isoformat(),
"waiting_for_user": True
}
# Update the workflow object in memory
workflow["status"] = workflow_update["status"]
workflow["last_activity"] = workflow_update["last_activity"]
workflow["waiting_for_user"] = workflow_update["waiting_for_user"]
# Save workflow state to database - only relevant fields, not the messages list
self.lucy_interface.update_workflow(workflow["id"], workflow_update)
return workflow
async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str:
"""
Erstellt eine Zusammenfassung des Workflows ohne die aktuelle User-Message.
@ -382,23 +449,6 @@ JSON_OUTPUT = {{
return "\n\n".join(summary_parts)
def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Finalisiert einen Workflow und setzt den Status auf 'stopped'.
Args:
workflow: Workflow-Objekt
Returns:
Aktualisiertes Workflow-Objekt
"""
workflow["status"] = "completed"
workflow["last_activity"] = datetime.now().isoformat()
workflow["waiting_for_user"] = True
# In Datenbank speichern
self.lucy_interface.save_workflow_state(workflow)
return workflow
### Agents
@ -412,7 +462,7 @@ JSON_OUTPUT = {{
"""
return self.agent_registry.get_agent_infos()
def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
async def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Prepares input documents for an agent, sorted with newest first.
@ -440,14 +490,15 @@ JSON_OUTPUT = {{
# Search for the document in sorted workflow messages (newest first)
for message in sorted_messages:
for doc in message.get("documents", []):
if (doc_file_id!="" and doc_file_id==doc.file_id) or (doc_filename!="" and self.get_filename(doc) == doc_filename):
if (doc_file_id!="" and doc_file_id==doc.get("file_id")) or (doc_filename!="" and self.get_filename(doc) == doc_filename):
found_doc = doc
break
if found_doc:
break
if found_doc:
# Process document for agent based on the specification
processed_doc = self.process_document_for_agent(found_doc, doc_spec)
processed_doc = await self.process_document_for_agent(found_doc, doc_spec)
prepared_inputs.append(processed_doc)
else:
logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow")
@ -482,7 +533,10 @@ JSON_OUTPUT = {{
# Get the data from the content
data = content.get("data", "")
processed_content = content.copy()
# Check if content data is base64 encoded
is_base64 = content.get("metadata", {}).get("base64_encoded", False)
try:
# Use the AI service to process the document content according to the prompt from the project manager for the document specification
summary = doc_spec.get("prompt", "Extract the relevant information from this document")
@ -506,13 +560,15 @@ JSON_OUTPUT = {{
{"role": "user", "content": ai_prompt}
])
# Update the processed content with the AI-processed data
processed_content["data"] = processed_data
# DO NOT change the original data field
# processed_content["data"] unchanged
processed_content["data_extracted"] = processed_data
processed_content["metadata"]["ai_processed"] = True
except Exception as e:
logger.error(f"Error processing document content with AI: {str(e)}")
# Fall back to original content if AI processing fails
processed_content["data_extracted"] = "(no information)"
processed_contents.append(processed_content)
@ -551,10 +607,7 @@ JSON_OUTPUT = {{
if not agent:
logger.error(f"Agent '{agent_name}' not found")
return []
# Prepare input documents for the agent
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
# Prepare output document specifications
output_specs = []
for doc in task.get("output_documents", []):
@ -563,7 +616,10 @@ JSON_OUTPUT = {{
"description": doc.get("prompt", "")
}
output_specs.append(output_spec)
# Prepare input documents for the agent
input_documents = await self.agent_input_documents(task.get('input_documents', []), workflow)
# Create a standardized task object for the agent
agent_task = {
"task_id": str(uuid.uuid4()),
@ -577,12 +633,17 @@ JSON_OUTPUT = {{
"timestamp": datetime.now().isoformat()
}
}
# Execute the agent with the standardized task
try:
# Process the task using the agent's standardized interface
logger.debug("TASK: "+self.parse_json2text(agent_task))
logger.debug(f"Agent '{agent_name}' AI service available: {agent.ai_service is not None}")
agent_results = await agent.process_task(agent_task)
logger.debug(f"Agent '{agent_name}' completed task. RESULT: {self.parse_json2text(agent_results)}")
# Log the agent response
self.log_add(
workflow,
@ -596,18 +657,17 @@ JSON_OUTPUT = {{
}
# Create a message in the workflow with the agent's response
agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
agent_message = await self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
return agent_message.get("documents", [])
except Exception as e:
error_msg = f"Error executing agent '{agent_name}': {str(e)}"
logger.error(error_msg)
logger.error(error_msg, exc_info=True) # Add exc_info=True to get full traceback
self.log_add(workflow, error_msg, level="error")
return []
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
"""
Saves all documents from agent results as files and returns a list of file IDs.
@ -641,25 +701,9 @@ JSON_OUTPUT = {{
# Determine if content is base64 encoded
is_base64 = False
if not isinstance(content, bytes):
# Check if content might be base64 encoded
try:
if content and isinstance(content, str):
# Check for base64 pattern (simplified)
if (len(content) % 4 == 0 and
re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
# Try to decode a small sample
sample = content[:100] if len(content) > 100 else content
base64.b64decode(sample)
is_base64 = True
except Exception:
# Not base64, treat as regular text
is_base64 = False
# If content has metadata flag indicating it's base64
if isinstance(content, dict) and content.get("_is_base64", False):
is_base64 = True
content = content.get("data", "")
if isinstance(content, dict) and content.get("metadata", {}).get("base64_encoded", False):
is_base64 = True
content = content.get("data", "")
# Convert content to bytes
if isinstance(content, str):
@ -699,46 +743,68 @@ JSON_OUTPUT = {{
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
"""
Fügt eine Nachricht zum Workflow hinzu und aktualisiert last_activity.
Adds a message to the workflow and updates last_activity.
Saves the message in the database and updates the workflow with references.
Args:
workflow: Workflow-Objekt
message: Zu speichernde Nachricht
workflow: Workflow object
message: Message to be saved
Returns:
ID der hinzugefügten Nachricht
Added message
"""
current_time = datetime.now().isoformat()
# Sicherstellen, dass Messages-Liste existiert
# Ensure messages list exists
if "messages" not in workflow:
workflow["messages"] = []
# Neue Nachrichten-ID generieren, falls nicht vorhanden
# Generate new message ID if not present
if "id" not in message:
message["id"] = f"msg_{str(uuid.uuid4())}"
# Workflow-ID und Zeitstempel hinzufügen
# Add workflow ID and timestamps
message["workflow_id"] = workflow["id"]
message["started_at"] = current_time
message["finished_at"] = current_time
# Sequenznummer setzen
# Set sequence number
message["sequence_no"] = len(workflow["messages"]) + 1
# Status setzen
# Ensure required fields are present
if "role" not in message:
# Set a default role based on agent_name
message["role"] = "assistant" if message.get("agent_name") else "user"
if "agent_name" not in message:
message["agent_name"] = ""
# Set status
message["status"] = "completed"
# Message zum Workflow hinzufügen
# Add message to workflow
workflow["messages"].append(message)
# Workflow-Status aktualisieren
workflow["last_activity"] = current_time
workflow["last_message_id"] = message["id"]
# Ensure message_ids list exists
if "message_ids" not in workflow:
workflow["message_ids"] = []
# In Datenbank speichern
# Add message ID to the message_ids list
workflow["message_ids"].append(message["id"])
# Update workflow status
workflow["last_activity"] = current_time
# Save to database - first the message itself
self.lucy_interface.create_workflow_message(message)
# Then save the workflow with updated references
workflow_update = {
"last_activity": current_time,
"message_ids": workflow["message_ids"] # Update the message_ids field
}
self.lucy_interface.update_workflow(workflow["id"], workflow_update)
return message
async def message_summarize(self, message: Dict[str, Any]) -> str:
@ -773,7 +839,7 @@ JSON_OUTPUT = {{
doc_name = self.get_filename(doc)
docs_list.append(doc_name)
if docs_list:
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}"
docs_summary = "\nDocuments:" + "\n- ".join(docs_list)
return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
@ -832,7 +898,13 @@ JSON_OUTPUT = {{
if file.get("mandate_id") != self.mandate_id:
logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}")
continue
# Load file content
file_content = self.lucy_interface.get_file_data(file_id)
if file_content is None:
logger.warning(f"No content found for file with ID {file_id}")
continue
# Create document
file_name_ext = file.get("name")
document = {
@ -840,26 +912,22 @@ JSON_OUTPUT = {{
"file_id": file_id,
"name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname",
"ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin",
"data": base64.b64encode(file_content).decode('utf-8'), # Add file data as base64
"contents": []
}
# Load contents immediately
file_content = self.lucy_interface.get_file_data(file_id)
if file_content is not None:
# Extract contents with the external function
contents = get_document_contents(file, file_content)
# Add summaries to each content item
for content in contents:
content["summary"] = await self.message_summarize_content(content)
document["contents"] = contents
logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries")
else:
logger.warning(f"No content found for file with ID {file_id}")
# Extract contents
contents = get_document_contents(file, file_content)
# Add summaries to each content item
for content in contents:
content["summary"] = await self.message_summarize_content(content)
document["contents"] = contents
logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries")
documents.append(document)
except Exception as e:
logger.error(f"Error processing file {file_id}: {str(e)}")
# Continue with remaining files instead of failing

View file

@ -40,7 +40,11 @@ class AgentAnalyst(AgentBase):
self.default_figsize = (10, 6)
self.chart_dpi = 100
plt.style.use(self.plt_style)
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and perform data analysis.

File diff suppressed because it is too large Load diff

View file

@ -26,7 +26,11 @@ class AgentCreative(AgentBase):
"document_generation",
"question_answering"
]
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and generate creative or knowledge-based content.
@ -135,7 +139,7 @@ class AgentCreative(AgentBase):
"feedback": f"An error occurred while creating creative content: {str(e)}",
"documents": []
}
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents.

View file

@ -26,7 +26,11 @@ class AgentDocumentation(AgentBase):
"technical_writing",
"knowledge_organization"
]
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and create documentation.

View file

@ -36,10 +36,14 @@ class AgentWebcrawler(AgentBase):
]
# Web crawling configuration
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5"))
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3"))
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30"))
self.max_url = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5"))
self.max_key = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3"))
self.max_result = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_TIMEOUT", "30"))
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
@ -542,7 +546,7 @@ class AgentWebcrawler(AgentBase):
List of search results
"""
formatted_query = quote_plus(query)
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
url = f"{APP_CONFIG.get('Agent_Webcrawler_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
search_results_soup = self._read_url(url)
if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'):
@ -614,7 +618,7 @@ class AgentWebcrawler(AgentBase):
BeautifulSoup object with the content or empty on errors
"""
headers = {
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
'User-Agent': APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}

View file

@ -98,14 +98,16 @@ def get_document_contents(file_metadata: Dict[str, Any], file_content: bytes) ->
}
})
# Add generic attributes for all documents
for content in contents:
if isinstance(content.get("data"), bytes):
content["data"] = base64.b64encode(content["data"]).decode('utf-8')
# Markiere in Metadaten, dass dies base64-kodiert ist
# Add base64 flag
if "metadata" not in content:
content["metadata"] = {}
content["metadata"]["base64_encoded"] = True
logger.info(f"Erfolgreich {len(contents)} Inhalte aus Datei '{file_name}' extrahiert")
return contents

View file

@ -180,7 +180,11 @@ class AgentRegistry:
Agent instance or None if not found
"""
if agent_identifier in self.agents:
return self.agents[agent_identifier]
agent = self.agents[agent_identifier]
# Ensure the agent has the AI service
if hasattr(agent, 'set_dependencies') and self.ai_service:
agent.set_dependencies(ai_service=self.ai_service)
return agent
logger.error(f"Agent with identifier '{agent_identifier}' not found")
return None

View file

@ -1,3 +1,4 @@
import os
import logging
import uuid
from datetime import datetime
@ -578,12 +579,15 @@ class LucyDOMInterface:
# 2. Speichere Binärdaten als Base64-String in der 'file_data'-Tabelle
logger.info(f"Saving file content to database for file: {file_name}")
self.create_file_data(db_file["id"], file_content)
# Debug: Export file to static folder
if logger.isEnabledFor(logging.DEBUG): self._export_file_to_static(file_content, db_file["id"], file_name)
# Debug: Verify database record was created
if not db_file:
logger.warning(f"Database record for file {file_name} was not created properly")
else:
logger.info(f"Database record created for file {file_name}")
logger.debug(f"Database record created for file {file_name}")
logger.info(f"File upload process completed for: {file_name}")
return db_file
@ -629,6 +633,10 @@ class LucyDOMInterface:
logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}")
raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}")
def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str):
debug_filename = f"{file_id}_{file_name}"
with open(f"./static/{debug_filename}", 'wb') as f:
f.write(file_content)
# Workflow Methoden
@ -664,10 +672,6 @@ class LucyDOMInterface:
if "last_activity" not in workflow_data:
workflow_data["last_activity"] = current_time
# Stelle sicher, dass last_message_id gesetzt ist, falls nicht vorhanden
if "last_message_id" not in workflow_data:
workflow_data["last_message_id"] = ""
return self.db.record_create("workflows", workflow_data)
def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
@ -723,70 +727,93 @@ class LucyDOMInterface:
return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id})
def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]:
"""Erstellt eine neue Nachricht für einen Workflow
"""
Creates a new message for a workflow.
Args:
message_data: Die Nachrichtendaten
message_data: The message data
Returns:
Die erstellte Nachricht oder None bei Fehler
The created message or None on error
"""
try:
# Check if required fields are present
required_fields = ["id", "workflow_id"]
for field in required_fields:
if field not in message_data:
logger.error(f"Pflichtfeld '{field}' fehlt in message_data")
raise ValueError(f"Pflichtfeld '{field}' fehlt in den Nachrichtendaten")
logger.error(f"Required field '{field}' missing in message_data")
raise ValueError(f"Required field '{field}' missing in message data")
# Validate that ID is not None
if message_data["id"] is None:
message_data["id"] = f"msg_{uuid.uuid4()}"
logger.warning(f"Automatisch generierte ID für Workflow-Nachricht: {message_data['id']}")
logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}")
# Stellen Sie sicher, dass die benötigten Felder vorhanden sind
# Ensure required fields are present
if "started_at" not in message_data and "created_at" not in message_data:
message_data["started_at"] = self._get_current_timestamp()
# Wenn "created_at" vorhanden ist, übertrage es nach "started_at"
if "created_at" in message_data and "started_at" not in message_data:
message_data["started_at"] = message_data["created_at"]
del message_data["created_at"]
# Status setzen, falls nicht vorhanden
# Set status if not present
if "status" not in message_data:
message_data["status"] = "completed"
# Sequenznummer setzen, falls nicht vorhanden
# Set sequence number if not present
if "sequence_no" not in message_data:
# Hole aktuelle Nachrichten, um die nächste Sequenznummer zu bestimmen
# Get current messages to determine next sequence number
existing_messages = self.get_workflow_messages(message_data["workflow_id"])
message_data["sequence_no"] = len(existing_messages) + 1
# Debug-Log für die zu erstellenden Daten
logger.debug(f"Erstelle Workflow-Nachricht mit Daten: {message_data}")
# Ensure role and agent_name are present
if "role" not in message_data:
message_data["role"] = "assistant" if message_data.get("agent_name") else "user"
return self.db.record_create("workflow_messages", message_data)
if "agent_name" not in message_data:
message_data["agent_name"] = ""
# Debug log for data to create
logger.debug(f"Creating workflow message with data: {message_data}")
# Create message in database
created_message = self.db.record_create("workflow_messages", message_data)
# Update workflow's message_ids if this is a new message
if created_message:
workflow_id = message_data["workflow_id"]
workflow = self.get_workflow(workflow_id)
if workflow:
# Get current message_ids or initialize empty list
message_ids = workflow.get("message_ids", [])
# Add the new message ID if not already in the list
if created_message["id"] not in message_ids:
message_ids.append(created_message["id"])
self.update_workflow(workflow_id, {"message_ids": message_ids})
return created_message
except Exception as e:
logger.error(f"Fehler beim Erstellen der Workflow-Nachricht: {str(e)}")
logger.error(f"Error creating workflow message: {str(e)}")
# Return None instead of raising to avoid cascading failures
return None
def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Aktualisiert eine bestehende Workflow-Nachricht in der Datenbank
with improved document handling.
Updates an existing workflow message in the database.
Args:
message_id: ID der Nachricht
message_data: Zu aktualisierende Daten
message_id: ID of the message
message_data: Data to update
Returns:
Das aktualisierte Nachrichtenobjekt oder None bei Fehler
The updated message object or None on error
"""
try:
# Print debug info
print(f"Updating message {message_id} in database")
# Debug info
logger.debug(f"Updating message {message_id} in database")
# Ensure message_id is provided
if not message_id:
@ -806,39 +833,19 @@ class LucyDOMInterface:
logger.error(f"Workflow ID missing for new message {message_id}")
return None
# Ensure documents array is handled properly
if "documents" in message_data:
logger.info(f"Message {message_id} has {len(message_data['documents'])} documents")
# Make sure we're not storing huge content in the database
# For each document, ensure content size is reasonable
documents_to_store = []
for doc in message_data["documents"]:
doc_copy = doc.copy()
# Process contents array if it exists
if "contents" in doc_copy:
# Ensure contents is not too large - limit text size
for content in doc_copy["contents"]:
if content.get("type") == "text" and "text" in content:
text = content["text"]
if len(text) > 1000: # Limit text preview to 1000 chars
content["text"] = text[:1000] + "... [truncated]"
documents_to_store.append(doc_copy)
# Replace with the processed documents
message_data["documents"] = documents_to_store
# Update existing message
existing_message = messages[0]
# Log the update data size for debugging
update_data_size = len(str(message_data))
logger.debug(f"Update data size: {update_data_size} bytes")
# Ensure required fields present
for key in ["role", "agent_name"]:
if key not in message_data and key not in existing_message:
message_data[key] = "assistant" if key == "role" else ""
# Ensure ID is in the dataset
if 'id' not in message_data:
message_data['id'] = message_id
# Konvertiere created_at zu started_at falls nötig
# Convert created_at to started_at if needed
if "created_at" in message_data and "started_at" not in message_data:
message_data["started_at"] = message_data["created_at"]
del message_data["created_at"]
@ -1005,23 +1012,24 @@ class LucyDOMInterface:
def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool:
"""
Speichert den kompletten Zustand eines Workflows in der Datenbank.
Dies umfasst den Workflow selbst, Nachrichten und Logs.
Saves the state of a workflow to the database.
Workflow data is updated, but messages are stored separately.
Args:
workflow: Das vollständige Workflow-Objekt
save_messages: Flag, ob Nachrichten gespeichert werden sollen
save_logs: Flag, ob Logs gespeichert werden sollen
workflow: The workflow object
save_messages: Flag to determine if messages should be saved
save_logs: Flag to determine if logs should be saved
Returns:
True bei Erfolg, False bei Fehler
True on success, False on failure
"""
try:
workflow_id = workflow.get("id")
if not workflow_id:
return False
# Extrahiere nur die für die Datenbank relevanten Workflow-Felder
# Extract only the database-relevant workflow fields
# IMPORTANT: Don't store messages in the workflow table!
workflow_db_data = {
"id": workflow_id,
"mandate_id": workflow.get("mandate_id", self.mandate_id),
@ -1030,58 +1038,56 @@ class LucyDOMInterface:
"status": workflow.get("status", "unknown"),
"started_at": workflow.get("started_at", self._get_current_timestamp()),
"last_activity": workflow.get("last_activity", self._get_current_timestamp()),
"last_message_id": workflow.get("last_message_id", ""),
"data_stats": workflow.get("data_stats", {})
}
# Prüfen, ob der Workflow bereits existiert
# Check if workflow already exists
existing_workflow = self.get_workflow(workflow_id)
if existing_workflow:
self.update_workflow(workflow_id, workflow_db_data)
else:
self.create_workflow(workflow_db_data)
# Nachrichten speichern
# Save messages
if save_messages and "messages" in workflow:
# Bestehende Nachrichten abrufen
existing_messages = {msg["id"]: msg for msg in self.get_workflow_messages(workflow_id)}
for message in workflow["messages"]:
message_id = message.get("id")
if not message_id:
continue
# Nur relevante Daten für die Datenbank extrahieren
message_data = {
"id": message_id,
"workflow_id": workflow_id,
"sequence_no": message.get("sequence_no", 0),
"role": message.get("role", "unknown"),
"content": message.get("content"),
"agent_name": message.get("agent_name"),
"status": message.get("status", "completed"),
"started_at": message.get("started_at", self._get_current_timestamp()),
"finished_at": message.get("finished_at"),
"parent_message_id": message.get("parent_message_id"),
# IMPORTANT: Include documents field to persist file attachments
"documents": message.get("documents", [])
}
# Since each message is already saved with create_workflow_message,
# we only need to check if updates are necessary
# First, get existing message from database
existing_messages = self.get_workflow_messages(workflow_id)
existing_message = next((m for m in existing_messages if m.get("id") == message_id), None)
# Debug logging for documents
doc_count = len(message.get("documents", []))
if doc_count > 0:
logger.info(f"Message {message_id} has {doc_count} documents to save")
# Nachricht erstellen oder aktualisieren
if message_id in existing_messages:
self.db.record_modify("workflow_messages", message_id, message_data)
if existing_message:
# Check if updates are needed
has_changes = False
for key in ["role", "agent_name", "content", "status", "documents"]:
if key in message and message.get(key) != existing_message.get(key):
has_changes = True
break
if has_changes:
# Extract only relevant data for the database
message_data = {
"role": message.get("role", existing_message.get("role", "unknown")),
"content": message.get("content", existing_message.get("content", "")),
"agent_name": message.get("agent_name", existing_message.get("agent_name", "")),
"status": message.get("status", existing_message.get("status", "completed")),
"documents": message.get("documents", existing_message.get("documents", []))
}
self.update_workflow_message(message_id, message_data)
else:
self.db.record_create("workflow_messages", message_data)
# Message doesn't exist in database yet
# It should have been saved via create_workflow_message
# If not, log a warning
logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database")
# Logs speichern
# Save logs
if save_logs and "logs" in workflow:
# Bestehende Logs abrufen
# Get existing logs
existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)}
for log in workflow["logs"]:
@ -1089,7 +1095,7 @@ class LucyDOMInterface:
if not log_id:
continue
# Nur relevante Daten für die Datenbank extrahieren
# Extract only relevant data for the database
log_data = {
"id": log_id,
"workflow_id": workflow_id,
@ -1100,7 +1106,7 @@ class LucyDOMInterface:
"agent_name": log.get("agent_name")
}
# Log erstellen oder aktualisieren
# Create or update log
if log_id in existing_logs:
self.db.record_modify("workflow_logs", log_id, log_data)
else:
@ -1108,22 +1114,22 @@ class LucyDOMInterface:
return True
except Exception as e:
logger.error(f"Fehler beim Speichern des Workflow-Zustands: {str(e)}")
logger.error(f"Error saving workflow state: {str(e)}")
return False
def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]:
"""
Lädt den kompletten Zustand eines Workflows aus der Datenbank.
Dies umfasst den Workflow selbst, Nachrichten und Logs.
Loads the complete state of a workflow from the database.
This includes the workflow itself, messages, and logs.
Args:
workflow_id: ID des zu ladenden Workflows
workflow_id: ID of the workflow to load
Returns:
Das vollständige Workflow-Objekt oder None bei Fehler
The complete workflow object or None on error
"""
try:
# Basis-Workflow laden
# Load base workflow
workflow = self.get_workflow(workflow_id)
if not workflow:
return None
@ -1131,41 +1137,46 @@ class LucyDOMInterface:
# Log the workflow base retrieval
logger.debug(f"Loaded base workflow {workflow_id} from database")
# Nachrichten laden
# Load messages
messages = self.get_workflow_messages(workflow_id)
# Nach Sequenznummer sortieren
# Sort by sequence number
messages.sort(key=lambda x: x.get("sequence_no", 0))
# Debug log for messages and document counts
message_count = len(messages)
logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}")
# Check if message_ids exists and is valid
message_ids = workflow.get("message_ids", [])
if not message_ids or len(message_ids) != len(messages):
# Rebuild message_ids from messages
message_ids = [msg.get("id") for msg in messages]
# Update in database
self.update_workflow(workflow_id, {"message_ids": message_ids})
logger.info(f"Rebuilt message_ids for workflow {workflow_id}")
# Log document counts for each message
for msg in messages:
doc_count = len(msg.get("documents", []))
if doc_count > 0:
logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database")
# Log document details for debugging
for i, doc in enumerate(msg.get("documents", [])):
file_id = doc.get("file_id", "unknown")
logger.debug(f"Document {i+1}: file_id={file_id}")
# Logs laden
# Load logs
logs = self.get_workflow_logs(workflow_id)
# Nach Zeitstempel sortieren
# Sort by timestamp
logs.sort(key=lambda x: x.get("timestamp", ""))
# Vollständiges Workflow-Objekt zusammenbauen
# Assemble complete workflow object
complete_workflow = workflow.copy()
complete_workflow["messages"] = messages
complete_workflow["message_ids"] = message_ids # Ensure message_ids is included
complete_workflow["logs"] = logs
return complete_workflow
except Exception as e:
logger.error(f"Fehler beim Laden des Workflow-Zustands: {str(e)}")
logger.error(f"Error loading workflow state: {str(e)}")
return None
# Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext
_lucydom_interfaces = {}

View file

@ -3,143 +3,137 @@ from typing import List, Dict, Any, Optional
class Label(BaseModel):
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
"""Label for an attribute or a class with support for multiple languages"""
default: str
translations: Dict[str, str] = {}
def get_label(self, language: str = None):
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
"""Returns the label in the specified language, or the default value if not available"""
if language and language in self.translations:
return self.translations[language]
return self.default
class Prompt(BaseModel):
"""Datenmodell für einen Prompt"""
id: int = Field(description="Eindeutige ID des Prompts")
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
user_id: int = Field(description="ID des Erstellers")
content: str = Field(description="Inhalt des Prompts")
name: str = Field(description="Anzeigename des Prompts")
"""Data model for a prompt"""
id: int = Field(description="Unique ID of the prompt")
mandate_id: int = Field(description="ID of the associated mandate")
user_id: int = Field(description="ID of the creator")
content: str = Field(description="Content of the prompt")
name: str = Field(description="Display name of the prompt")
label: Label = Field(
default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}),
description="Label für die Klasse"
description="Label for the class"
)
# Labels für Attribute
# Labels for attributes
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}),
"mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"content": Label(default="Content", translations={"en": "Content", "fr": "Contenu"}),
"name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}),
}
class FileItem(BaseModel):
"""Datenmodell für ein File"""
id: int = Field(description="Eindeutige ID des Datenobjekts")
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
user_id: int = Field(description="ID des Erstellers")
name: str = Field(description="Name des Datenobjekts")
mime_type: str = Field(description="Typ des Datenobjekts MIME-Typ")
size: Optional[int] = Field(None, description="Größe des Datenobjekts in Bytes")
file_hash: str = Field(description="Hash code für Deduplizierung")
creation_date: Optional[str] = Field(None, description="Datum des Hochladens")
workflow_id: Optional[str] = Field(None, description="ID des zugehörigen Workflows, falls vorhanden")
"""Data model for a file"""
id: int = Field(description="Unique ID of the data object")
mandate_id: int = Field(description="ID of the associated mandate")
user_id: int = Field(description="ID of the creator")
name: str = Field(description="Name of the data object")
mime_type: str = Field(description="Type of the data object MIME type")
size: Optional[int] = Field(None, description="Size of the data object in bytes")
file_hash: str = Field(description="Hash code for deduplication")
creation_date: Optional[str] = Field(None, description="Upload date")
workflow_id: Optional[str] = Field(None, description="ID of the associated workflow, if any")
label: Label = Field(
default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}),
description="Label für die Klasse"
default=Label(default="Data Object", translations={"en": "Data Object", "fr": "Objet de données"}),
description="Label for the class"
)
# Labels für Attribute
# Labels for attributes
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}),
"mime_type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}),
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}),
"file_hash": Label(default="File-Hash", translations={"en": "Hash", "fr": "Hash"}),
"creation_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
"workflow_id": Label(default="Workflow-ID", translations={"en": "Workflow ID", "fr": "ID du workflow"})
"mime_type": Label(default="Type", translations={"en": "Type", "fr": "Type"}),
"size": Label(default="Size", translations={"en": "Size", "fr": "Taille"}),
"file_hash": Label(default="File Hash", translations={"en": "Hash", "fr": "Hash"}),
"creation_date": Label(default="Upload date", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
"workflow_id": Label(default="Workflow ID", translations={"en": "Workflow ID", "fr": "ID du workflow"})
}
class FileData(BaseModel):
"""Datenmodell für den File-Inhalt"""
id: int = Field(description="Eindeutige ID des Datenobjekts")
data: str = Field(description="Binärer Inhalt der Datei als Base64-String")
"""Data model for file content"""
id: int = Field(description="Unique ID of the data object")
data: str = Field(description="Binary content of the file as base64 string")
# Workflow-Modellklassen
# Workflow model classes
class DocumentContent(BaseModel):
"""Inhalt eines Dokuments im Workflow"""
sequence_nr: int = Field(1, description="Sequenz-Nummer des Inhaltes im Quelldokument")
name: str = Field(description="Bezeichnung")
"""Content of a document in the workflow"""
sequence_nr: int = Field(1, description="Sequence number of the content in the source document")
name: str = Field(description="Designation")
ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png")
content_type: str = Field(description="MIME-Typ")
data: str = Field(description="Binärer Inhalt der Daten als Base64-String")
summary: str = Field(description="Zusammenfassung des Datei-Inhaltes")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadaten zum Inhalt, wie z.B. is_text Flag, Format-Informationen, Encoding usw.")
content_type: str = Field(description="MIME type")
summary: str = Field(description="Summary of the file content")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadata about the content, such as is_text flag, format information, encoding, etc.")
class Document(BaseModel):
"""Dokument im Workflow - Referenziert direkt eine Datei in der Datenbank"""
id: str = Field(description="Eindeutige ID des Dokuments")
name: str = Field(description="Name des Datenobjekts")
ext: str = Field(description="Extension des Datenobjekts")
file_id: int = Field(description="ID der referenzierten Datei in der Datenbank")
contents: List[DocumentContent] = Field(description="Dokumentinhalte")
"""Document in the workflow - References a file directly in the database"""
id: str = Field(description="Unique ID of the document")
name: str = Field(description="Name of the data object")
ext: str = Field(description="Extension of the data object")
file_id: int = Field(description="ID of the referenced file in the database")
data: str = Field(description="Content of the data as base64 string")
contents: List[DocumentContent] = Field(description="Document contents")
class DataStats(BaseModel):
"""Statistiken für Performance und Datennutzung"""
processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden")
token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)")
bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes")
bytes_received: Optional[int] = Field(None, description="Empfangene Bytes")
"""Statistics for performance and data usage"""
processing_time: Optional[float] = Field(None, description="Processing time in seconds")
token_count: Optional[int] = Field(None, description="Token count (for AI models)")
bytes_sent: Optional[int] = Field(None, description="Bytes sent")
bytes_received: Optional[int] = Field(None, description="Bytes received")
class Message(BaseModel):
"""Nachrichtenobjekt im Workflow"""
id: str = Field(description="Eindeutige ID der Nachricht")
workflow_id: str = Field(description="Referenz zum übergeordneten Workflow")
parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht")
started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung")
finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss")
sequence_no: int = Field(description="Sequenznummer für Sortierung")
"""Message object in the workflow"""
id: str = Field(description="Unique ID of the message")
workflow_id: str = Field(description="Reference to the parent workflow")
parent_message_id: Optional[str] = Field(None, description="Reference to the replied message")
started_at: str = Field(description="Timestamp for message creation")
finished_at: Optional[str] = Field(None, description="Timestamp for message completion")
sequence_no: int = Field(description="Sequence number for sorting")
status: str = Field(description="Status der Nachricht ('processing', 'completed')")
role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')")
status: str = Field(description="Status of the message ('processing', 'completed')")
role: str = Field(description="Role of the sender ('system', 'user', 'assistant')")
data_stats: Optional[DataStats] = Field(None, description="Statistiken")
documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht (Referenzen zu Dateien in der Datenbank)")
content: Optional[str] = Field(None, description="Textinhalt der Nachricht")
agent_name: Optional[str] = Field(None, description="Name des verwendeten Agenten")
data_stats: Optional[DataStats] = Field(None, description="Statistics")
documents: Optional[List[Document]] = Field(None, description="Documents in this message (references to files in the database)")
content: Optional[str] = Field(None, description="Text content of the message")
agent_name: Optional[str] = Field(None, description="Name of the agent used")
class Workflow(BaseModel):
"""Workflow-Objekt für Multi-Agent-System"""
id: str = Field(description="Eindeutige ID des Workflows")
name: Optional[str] = Field(None, description="Name des Workflows")
mandate_id: int = Field(description="ID des Mandanten")
user_id: int = Field(description="ID des Benutzers")
status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')")
started_at: str = Field(description="Startzeitpunkt")
last_activity: str = Field(description="Zeitpunkt der letzten Aktivität")
last_message_id: str = Field(description="The last registered message")
"""Workflow object for multi-agent system"""
id: str = Field(description="Unique ID of the workflow")
name: Optional[str] = Field(None, description="Name of the workflow")
mandate_id: int = Field(description="ID of the mandate")
user_id: int = Field(description="ID of the user")
status: str = Field(description="Status of the workflow ('running', 'failed', 'stopped')")
started_at: str = Field(description="Start timestamp")
last_activity: str = Field(description="Timestamp of the last activity")
message_ids: List[str] = Field(default=[], description="List of message IDs in this workflow")
data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken")
messages: List[Message] = Field(default=[], description="Nachrichtenverlauf")
logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge")
data_stats: Optional[Dict[str, Any]] = Field(None, description="Total statistics")
messages: List[Message] = Field(default=[], description="Message history")
logs: List[Dict[str, Any]] = Field(default=[], description="Log entries")
# Anfragemodelle für die API
class WorkflowCreateRequest(BaseModel):
"""Anfrage zur Erstellung eines neuen Workflows"""
name: Optional[str] = Field(None, description="Name des Workflows")
prompt: str = Field(description="Zu verwendender Prompt")
files: List[int] = Field(default=[], description="Liste von FileItem ID")
# Request models for the API
class UserInputRequest(BaseModel):
"""Anfrage für Benutzereingabe an einen laufenden Workflow"""
prompt: str = Field(description="Nachricht des Benutzers")
listFileId: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID")
"""Request for user input to a running workflow"""
prompt: str = Field(description="Message from the user")
list_file_id: List[int] = Field(default=[], description="List of FileItem IDs")

View file

@ -1,19 +1,32 @@
....................... TASKS
please revise all chat_agents* modules:
- all comments, logs and outputs in english language
- all ai answers in the language of the user
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
documentation agent:
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
can you do following adaptions
everywhere:
- to remove base64 checks ot tests. only to use base64_encoded attribute
- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted")
please tell me, where to adapt what in the code. I do not neew fully new code.
german comments in logs and prompts to translate to english. where to adapt what?
can you enhance all ai prompts to include, that the output is delivered in the language of the user?
An option to have a global variable for this, which is also trasferred with the task to the agents?
streamline self.log_add --> to use in a standardized format and to reduce messages to relevant steps
add connector to myoutlook
webcrawler_agent:
- there is a try - except mapping problem in the code. please also fix this
-
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
----------------------- OPEN
@ -41,6 +54,47 @@ frontend: no labels definition
----------------------- DONE
can you do following adaptions
for document class:
- class Document to have a "data" attribute, where the file-data is stored in base64 format
based on this:
- task object for agents to enhance with this attribute
for content in contents in documents, when adding a file to a document object:
- to set "base64_encoded" if encoded. this should already be, to check
when building task for the agents:
- ensure attribute "data" is integrated, containing filedata base64 encoded
- in each content to deliver "data" as it is, optional "base64_encoded" attribute depending on data format, to add attribute "data_extracted" and to store here the extracted data from ai call
everywhere:
- to remove base64 checks ot tests. only to use base64_encoded attribute
- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted")
please tell me, where to adapt what in the code. I do not neew fully new code.
please revise all chat_agents* modules:
- all comments, logs and outputs in english language
- all ai answers in the language of the user
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
documentation agent:
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
webcrawler_agent:
- there is a try - except mapping problem in the code. please also fix this
-
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
alle expliziten prompt ersetzen.

1
result.txt Normal file
View file

@ -0,0 +1 @@
{'total_pixels': None, 'total_characters': None}

View file

@ -70,7 +70,7 @@ async def list_workflows(current_user: Dict[str, Any] = Depends(get_current_acti
@router.post("/{workflow_id}/user-input", response_model=Dict[str, Any])
async def submit_user_input(
workflow_id: Optional[str] = Path(None, description="ID des Workflows (optional)"),
user_input: Dict[str, Any] = Body(...),
user_input: lucydom_model.UserInputRequest = Body(...),
current_user: Dict[str, Any] = Depends(get_current_active_user)
):
"""
@ -84,7 +84,11 @@ async def submit_user_input(
try:
# Workflow mit dem Chat-Manager fortsetzen oder neu starten
workflow = await context.interface_chat.chat_run(user_input, workflow_id)
user_input_dict = {
"prompt": user_input.prompt,
"list_file_id": user_input.list_file_id
}
workflow = await context.interface_chat.chat_run(user_input_dict, workflow_id)
if not workflow:
raise HTTPException(

View file

@ -0,0 +1,10 @@
This is a test text file for the ChatManager workflow.
It contains some information for testing document processing.
The ChatManager should be able to process this file
and extract relevant information from it.
This file serves as an example for text-based documents that can be
used in a chat workflow.

BIN
static/2_test_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 B

View file

@ -0,0 +1,52 @@
# REQUIREMENTS: Pillow
from PIL import Image
def calculate_image_pixels(image_path):
try:
with Image.open(image_path) as img:
width, height = img.size
total_pixels = width * height
return total_pixels
except Exception as e:
print(f"Error calculating image pixels: {e}")
return None
def calculate_text_characters(text_path):
try:
with open(text_path, 'r', encoding='utf-8') as file:
text = file.read()
total_characters = len(text)
return total_characters
except Exception as e:
print(f"Error calculating text characters: {e}")
return None
def main():
image_path = 'test_image'
text_path = 'test_document'
# Calculate total pixels in the image
total_pixels = calculate_image_pixels(image_path)
# Calculate total characters in the text document
total_characters = calculate_text_characters(text_path)
# Prepare the result dictionary
result = {
'total_pixels': total_pixels,
'total_characters': total_characters
}
# Write the result to a text file
try:
with open('result.txt', 'w') as result_file:
result_file.write(str(result))
except Exception as e:
print(f"Error writing result to file: {e}")
# Output the result
print(result)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,6 @@
Execution error:
Traceback (most recent call last):
File "C:\Users\pmots\AppData\Local\Temp\code_exec_itmq0xhw\code_9cc3911d.py", line 3, in <module>
from PIL import Image
ModuleNotFoundError: No module named 'PIL'

50
test2.py Normal file
View file

@ -0,0 +1,50 @@
from PIL import Image
def calculate_image_pixels(image_path):
try:
with Image.open(image_path) as img:
width, height = img.size
total_pixels = width * height
return total_pixels
except Exception as e:
print(f"Error calculating image pixels: {e}")
return None
def calculate_text_characters(text_path):
try:
with open(text_path, 'r', encoding='utf-8') as file:
text = file.read()
total_characters = len(text)
return total_characters
except Exception as e:
print(f"Error calculating text characters: {e}")
return None
def main():
image_path = 'test_image'
text_path = 'test_document'
# Calculate total pixels in the image
total_pixels = calculate_image_pixels(image_path)
# Calculate total characters in the text document
total_characters = calculate_text_characters(text_path)
# Prepare the result dictionary
result = {
'total_pixels': total_pixels,
'total_characters': total_characters
}
# Write the result to a text file
try:
with open('result.txt', 'w') as result_file:
result_file.write(str(result))
except Exception as e:
print(f"Error writing result to file: {e}")
# Output the result
print(result)
if __name__ == "__main__":
main()

View file

@ -1,6 +1,6 @@
"""
Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads.
Demonstriert den vollständigen Workflow von Datei-Upload bis Chat-Ausführung.
Test script for ChatManager workflow with simulated file uploads.
Demonstrates the complete workflow from file upload to chat execution.
"""
import asyncio
@ -11,7 +11,7 @@ import sys
from typing import Dict, Any, List, Tuple
from datetime import datetime
# Logging konfigurieren
# Configure logging
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
@ -19,43 +19,43 @@ logging.basicConfig(
)
logger = logging.getLogger("test_workflow")
# Pfad zum Projektverzeichnis hinzufügen
# Add project directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Module importieren
# Import modules
from modules.lucydom_interface import get_lucydom_interface
from modules.chat import get_chat_manager
async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
"""
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch.
Creates a text file and an image for testing and uploads them to the database.
Args:
mandate_id: ID des Mandanten
user_id: ID des Benutzers
mandate_id: ID of the mandate
user_id: ID of the user
Returns:
Tuple mit (text_file_id, image_file_id)
Tuple with (text_file_id, image_file_id)
"""
logger.info("Erstelle Test-Dateien...")
logger.info("Creating test files...")
lucy_interface = get_lucydom_interface(mandate_id, user_id)
# Textdatei erstellen
# Create text file
text_content = """
Dies ist eine Test-Textdatei für den ChatManager-Workflow.
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung.
This is a test text file for the ChatManager workflow.
It contains some information for testing document processing.
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten
und daraus relevante Informationen zu extrahieren.
The ChatManager should be able to process this file
and extract relevant information from it.
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem
Chat-Workflow verwendet werden können.
This file serves as an example for text-based documents that can be
used in a chat workflow.
"""
text_file_bytes = text_content.encode('utf-8')
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
text_file_id = text_file["id"]
logger.info(f"Textdatei erstellt mit ID: {text_file_id}")
logger.info(f"Text file created with ID: {text_file_id}")
# Create a simple test image using PIL
try:
@ -73,7 +73,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
# Upload to database
image_file = lucy_interface.save_uploaded_file(img_bytes, "test_image.png")
image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
logger.info(f"Image file created with ID: {image_file_id}")
except ImportError:
# Fallback to the original method if PIL is not available
@ -87,7 +87,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
logger.info(f"Image file created with ID: {image_file_id}")
return text_file_id, image_file_id
@ -95,72 +95,73 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
async def run_chat_workflow(mandate_id: int, user_id: int, file_ids: List[int]) -> Dict[str, Any]:
"""
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus.
Executes a chat workflow with given file IDs.
Args:
mandate_id: ID des Mandanten
user_id: ID des Benutzers
file_ids: Liste der Datei-IDs
mandate_id: ID of the mandate
user_id: ID of the user
file_ids: List of file IDs
Returns:
Das Workflow-Ergebnis
The workflow result
"""
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}")
logger.info(f"Starting chat workflow with files: {file_ids}")
# ChatManager initialisieren
# Initialize ChatManager
chat_manager = get_chat_manager(mandate_id, user_id)
# Benutzeranfrage erstellen
# Create user request
user_input = {
"message": "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt.",
"additional_fileids": file_ids
"prompt": "Bitte zähle mir zusammen wieviele Pixel das Bild hat und wieviele Zeichen der Text der Dokumente hat",
"list_file_id": file_ids
}
# Chat-Workflow ausführen
# Execute chat workflow
workflow_result = await chat_manager.chat_run(user_input)
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}")
logger.info(f"Workflow completed with ID: {workflow_result['id']}")
return workflow_result
def analyze_workflow_result(workflow: Dict[str, Any]) -> None:
"""
Analysiert und gibt Informationen über das Workflow-Ergebnis aus.
Analyzes and outputs information about the workflow result.
Args:
workflow: Das Workflow-Ergebnis
workflow: The workflow result
"""
logger.info("Analysiere Workflow-Ergebnis:")
logger.info(f"Workflow-ID: {workflow['id']}")
logger.info("Analyzing workflow result:")
logger.info(f"Workflow ID: {workflow['id']}")
logger.info(f"Status: {workflow['status']}")
logger.info(f"Anzahl Nachrichten: {len(workflow.get('messages', []))}")
logger.info(f"Number of messages: {len(workflow.get('messages', []))}")
for i, message in enumerate(workflow.get('messages', [])):
logger.info(f"Nachricht {i+1}:")
logger.info(f" Rolle: {message.get('role', 'unbekannt')}")
logger.info(f"Message {i+1}:")
logger.info(f" Role: {message.get('role', 'unknown')}")
# Nur die ersten 100 Zeichen des Inhalts anzeigen
# Show only the first 100 characters of content
content = message.get('content', '')
content_preview = content[:100] + '...' if len(content) > 100 else content
logger.info(f" Inhalt: {content_preview}")
logger.info(f" Content: {content_preview}")
# Dokumente in der Nachricht anzeigen
# Show documents in the message
documents = message.get('documents', [])
logger.info(f" Dokumente: {len(documents)}")
logger.info(f" Documents: {len(documents)}")
for j, doc in enumerate(documents):
doc_id = doc.get('id', 'keine ID')
file_id = doc.get('file_id', 'keine file_id')
logger.info(f" Dokument {j+1}: ID={doc_id}, File-ID={file_id}")
doc_id = doc.get('id', 'no ID')
file_id = doc.get('file_id', 'no file_id')
logger.info(f" Document {j+1}: ID={doc_id}, File-ID={file_id}")
# Informationen über Inhalte
# Information about contents
contents = doc.get('contents', [])
for k, content in enumerate(contents):
content_name = content.get('name', 'kein Name')
content_type = content.get('content_type', 'unbekannt')
logger.info(f" Inhalt {k+1}: {content_name} ({content_type})")
content_name = content.get('name', 'no name')
content_type = content.get('content_type', 'unknown')
logger.info(f" Content {k+1}: {content_name} ({content_type})")
# Log-Einträge anzeigen
logger.info(f"Logs: {len(workflow.get('logs', []))}")
for i, log in enumerate(workflow.get('logs', []))[:10]: # Begrenzung auf 10 Logs
logs = workflow.get('logs', [])
logger.info(f"Logs: {len(logs)}")
# Get only the first 10 logs
for i, log in enumerate(logs[:10]): # Apply the slice to logs, not enumerate
log_type = log.get('type', 'info')
log_message = log.get('message', '')
log_message_preview = log_message[:100] + '...' if len(log_message) > 100 else log_message
@ -168,14 +169,14 @@ def analyze_workflow_result(workflow: Dict[str, Any]) -> None:
async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) -> None:
"""
Bereinigt die erstellten Testdateien.
Cleans up the created test files.
Args:
mandate_id: ID des Mandanten
user_id: ID des Benutzers
file_ids: Liste der zu löschenden Datei-IDs
mandate_id: ID of the mandate
user_id: ID of the user
file_ids: List of file IDs to delete
"""
logger.info("Beginne Bereinigung der Testdateien...")
logger.info("Starting cleanup of test files...")
lucy_interface = get_lucydom_interface(mandate_id, user_id)
@ -183,47 +184,47 @@ async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int])
try:
success = lucy_interface.delete_file(file_id)
if success:
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht")
logger.info(f"File with ID {file_id} successfully deleted")
else:
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}")
logger.warning(f"Error deleting file with ID {file_id}")
except Exception as e:
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}")
logger.error(f"Error deleting file with ID {file_id}: {str(e)}")
logger.info("Bereinigung abgeschlossen")
logger.info("Cleanup completed")
async def main():
"""
Hauptfunktion, die den gesamten Testprozess steuert.
Main function that controls the entire test process.
"""
# Testparameter
MANDATE_ID = 1 # Test-Mandanten-ID
USER_ID = 1 # Test-Benutzer-ID
CLEANUP = True # Bereinigung nach dem Test
# Test parameters
MANDATE_ID = 1 # Test mandate ID
USER_ID = 1 # Test user ID
CLEANUP = True # Cleanup after test
try:
logger.info("=== Test-Workflow für ChatManager gestartet ===")
logger.info("=== ChatManager test workflow started ===")
# Schritt 1: Testdateien erstellen
# Step 1: Create test files
text_file_id, image_file_id = await create_test_files(MANDATE_ID, USER_ID)
file_ids = [text_file_id, image_file_id]
# Schritt 2: Chat-Workflow ausführen
# Step 2: Execute chat workflow
workflow_result = await run_chat_workflow(MANDATE_ID, USER_ID, file_ids)
# Schritt 3: Ergebnis analysieren
# Step 3: Analyze result
analyze_workflow_result(workflow_result)
# Schritt 4: Optional bereinigen
# Step 4: Optional cleanup
if CLEANUP:
await cleanup_test_files(MANDATE_ID, USER_ID, file_ids)
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===")
logger.info("=== Test workflow successfully completed ===")
except Exception as e:
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True)
logger.info("=== Test-Workflow mit Fehler beendet ===")
logger.error(f"Error in test workflow: {str(e)}", exc_info=True)
logger.info("=== Test workflow ended with error ===")
if __name__ == "__main__":
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen
# Create event loop for asyncio and execute main function
loop = asyncio.get_event_loop()
loop.run_until_complete(main())

View file

@ -1,373 +0,0 @@
"""
Erweitertes Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads.
Bietet zusätzliche Konfigurationsmöglichkeiten und detailliertere Tests.
"""
import asyncio
import logging
import os
import sys
import argparse
import json
from typing import Dict, Any, List, Tuple, Optional
from datetime import datetime
# Logging konfigurieren
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("test_workflow")
# Pfad zum Projektverzeichnis hinzufügen
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Module importieren
from modules.lucydom_interface import get_lucydom_interface
from modules.chat import get_chat_manager
class TestConfig:
"""Konfigurationsklasse für Testparameter"""
def __init__(self):
self.mandate_id = 1
self.user_id = 1
self.cleanup = True
self.save_results = True
self.results_dir = "test_results"
self.test_message = "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt."
self.text_file_content = """
Dies ist eine Test-Textdatei für den ChatManager-Workflow.
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung.
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten
und daraus relevante Informationen zu extrahieren.
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem
Chat-Workflow verwendet werden können.
"""
def parse_args() -> TestConfig:
"""Parst Kommandozeilenargumente"""
parser = argparse.ArgumentParser(description="Test für ChatManager-Workflow")
parser.add_argument("--mandate-id", type=int, default=1, help="ID des Mandanten")
parser.add_argument("--user-id", type=int, default=1, help="ID des Benutzers")
parser.add_argument("--no-cleanup", action="store_true", help="Testdateien nicht löschen")
parser.add_argument("--no-save", action="store_true", help="Ergebnisse nicht speichern")
parser.add_argument("--results-dir", type=str, default="test_results", help="Verzeichnis für Ergebnisse")
parser.add_argument("--message", type=str, help="Benutzernachricht für den Test")
args = parser.parse_args()
config = TestConfig()
config.mandate_id = args.mandate_id
config.user_id = args.user_id
config.cleanup = not args.no_cleanup
config.save_results = not args.no_save
config.results_dir = args.results_dir
if args.message:
config.test_message = args.message
return config
async def create_test_files(config: TestConfig) -> Tuple[int, int]:
"""
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch.
Args:
config: Testkonfiguration
Returns:
Tuple mit (text_file_id, image_file_id)
"""
logger.info("Erstelle Test-Dateien...")
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
# Textdatei erstellen
text_content = config.text_file_content
text_file_bytes = text_content.encode('utf-8')
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
text_file_id = text_file["id"]
logger.info(f"Textdatei erstellt mit ID: {text_file_id}")
# Bilddatei erstellen (einfaches 1x1 PNG)
# Base64-kodiertes 1x1 PNG
png_data = bytes.fromhex(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
"89000000017352474200aece1ce90000000467414d410000b18f0bfc61050000"
"000970485973000016250000162501495224f00000001974455874536f667477"
"617265007777772e696e6b73636170652e6f72679bee3c1a0000000c49444154"
"08d763f8ffff3f0005fe02fec1cd59830000000049454e44ae426082"
)
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
return text_file_id, image_file_id
async def verify_uploaded_files(mandate_id: int, user_id: int, file_ids: List[int]) -> bool:
"""
Überprüft, ob die hochgeladenen Dateien korrekt in der Datenbank gespeichert wurden
Args:
mandate_id: ID des Mandanten
user_id: ID des Benutzers
file_ids: Liste der Datei-IDs
Returns:
True, wenn alle Dateien verfügbar sind
"""
logger.info("Überprüfe hochgeladene Dateien...")
lucy_interface = get_lucydom_interface(mandate_id, user_id)
all_files_available = True
for file_id in file_ids:
file = lucy_interface.get_file(file_id)
if file:
file_data = lucy_interface.get_file_data(file_id)
if file_data:
logger.info(f"Datei {file_id} ({file.get('name', 'Unbekannt')}, {file.get('mime_type', 'Unbekannt')}) ist verfügbar")
logger.info(f" Größe: {len(file_data)} Bytes")
else:
logger.error(f"Datei {file_id} hat keine Binärdaten")
all_files_available = False
else:
logger.error(f"Datei mit ID {file_id} nicht in der Datenbank gefunden")
all_files_available = False
return all_files_available
async def run_chat_workflow(config: TestConfig, file_ids: List[int]) -> Dict[str, Any]:
"""
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus.
Args:
config: Testkonfiguration
file_ids: Liste der Datei-IDs
Returns:
Das Workflow-Ergebnis
"""
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}")
# ChatManager initialisieren
chat_manager = get_chat_manager(config.mandate_id, config.user_id)
# Benutzeranfrage erstellen
user_input = {
"message": config.test_message,
"additional_fileids": file_ids
}
# Start-Zeit erfassen
start_time = datetime.now()
# Chat-Workflow ausführen
workflow_result = await chat_manager.chat_run(user_input)
# Ende-Zeit und Dauer berechnen
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}")
logger.info(f"Dauer: {duration:.2f} Sekunden")
return workflow_result
def analyze_workflow_result(workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Analysiert das Workflow-Ergebnis und gibt Statistiken zurück.
Args:
workflow: Das Workflow-Ergebnis
Returns:
Dictionary mit Analyseergebnissen
"""
logger.info("Analysiere Workflow-Ergebnis:")
# Basis-Informationen
analysis = {
"workflow_id": workflow.get("id"),
"status": workflow.get("status"),
"message_count": len(workflow.get("messages", [])),
"log_count": len(workflow.get("logs", [])),
"document_count": 0,
"roles": {},
"document_types": {},
"response_sizes": []
}
# Nachrichten analysieren
for message in workflow.get("messages", []):
# Rollen zählen
role = message.get("role", "unknown")
if role not in analysis["roles"]:
analysis["roles"][role] = 0
analysis["roles"][role] += 1
# Content-Größe bei Antworten
if role == "assistant":
content = message.get("content", "")
analysis["response_sizes"].append(len(content))
# Dokumente zählen und analysieren
documents = message.get("documents", [])
analysis["document_count"] += len(documents)
for doc in documents:
contents = doc.get("contents", [])
for content in contents:
content_type = content.get("content_type", "unknown")
if content_type not in analysis["document_types"]:
analysis["document_types"][content_type] = 0
analysis["document_types"][content_type] += 1
# Ausgabe für Log
logger.info(f"Workflow-ID: {analysis['workflow_id']}")
logger.info(f"Status: {analysis['status']}")
logger.info(f"Anzahl Nachrichten: {analysis['message_count']}")
logger.info(f"Anzahl Dokumente: {analysis['document_count']}")
logger.info(f"Rollen-Verteilung: {analysis['roles']}")
logger.info(f"Dokumenttypen: {analysis['document_types']}")
if analysis["response_sizes"]:
avg_size = sum(analysis["response_sizes"]) / len(analysis["response_sizes"])
logger.info(f"Durchschnittliche Antwortgröße: {avg_size:.2f} Zeichen")
# Detaillierte Nachrichteninformationen
for i, message in enumerate(workflow.get("messages", [])[:5]): # Begrenzung auf 5 Nachrichten
logger.info(f"Nachricht {i+1}:")
logger.info(f" Rolle: {message.get('role', 'unbekannt')}")
# Nur die ersten 100 Zeichen des Inhalts anzeigen
content = message.get("content", "")
content_preview = content[:100] + "..." if len(content) > 100 else content
logger.info(f" Inhalt: {content_preview}")
# Dokumente in der Nachricht anzeigen
documents = message.get("documents", [])
if documents:
logger.info(f" Dokumente: {len(documents)}")
for j, doc in enumerate(documents):
file_id = doc.get("file_id", "keine file_id")
logger.info(f" Dokument {j+1}: File-ID={file_id}")
return analysis
def save_test_results(config: TestConfig, workflow: Dict[str, Any], analysis: Dict[str, Any]) -> None:
"""
Speichert die Testergebnisse in einer Datei.
Args:
config: Testkonfiguration
workflow: Das vollständige Workflow-Ergebnis
analysis: Die Analyseergebnisse
"""
if not config.save_results:
return
# Ergebnisverzeichnis erstellen, falls es nicht existiert
os.makedirs(config.results_dir, exist_ok=True)
# Zeitstempel für eindeutige Dateinamen
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Speichere die Analyse
analysis_file = os.path.join(config.results_dir, f"analysis_{timestamp}.json")
with open(analysis_file, "w", encoding="utf-8") as f:
json.dump(analysis, f, indent=2, ensure_ascii=False)
logger.info(f"Analyse gespeichert in: {analysis_file}")
# Speichere den vollständigen Workflow (ohne große Binärdaten)
workflow_copy = workflow.copy()
# Entferne Binärdaten aus dem Export, um die Dateigröße zu reduzieren
for message in workflow_copy.get("messages", []):
if "documents" in message:
for doc in message.get("documents", []):
if "contents" in doc:
for content in doc.get("contents", []):
if "data" in content and isinstance(content["data"], bytes) and len(content["data"]) > 1000:
content["data"] = f"[{len(content['data'])} Bytes]"
workflow_file = os.path.join(config.results_dir, f"workflow_{timestamp}.json")
with open(workflow_file, "w", encoding="utf-8") as f:
# Konvertiere Bytes zu Strings für JSON-Serialisierung
json.dump(workflow_copy, f, indent=2, ensure_ascii=False, default=lambda o:
o.decode("utf-8") if isinstance(o, bytes) else str(o))
logger.info(f"Workflow gespeichert in: {workflow_file}")
async def cleanup_test_files(config: TestConfig, file_ids: List[int]) -> None:
"""
Bereinigt die erstellten Testdateien.
Args:
config: Testkonfiguration
file_ids: Liste der zu löschenden Datei-IDs
"""
if not config.cleanup:
logger.info("Bereinigung übersprungen (--no-cleanup)")
return
logger.info("Beginne Bereinigung der Testdateien...")
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
for file_id in file_ids:
try:
success = lucy_interface.delete_file(file_id)
if success:
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht")
else:
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}")
except Exception as e:
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}")
logger.info("Bereinigung abgeschlossen")
async def main():
"""
Hauptfunktion, die den gesamten Testprozess steuert.
"""
# Konfiguration laden
config = parse_args()
try:
logger.info("=== Test-Workflow für ChatManager gestartet ===")
logger.info(f"Mandate-ID: {config.mandate_id}, User-ID: {config.user_id}")
# Schritt 1: Testdateien erstellen
text_file_id, image_file_id = await create_test_files(config)
file_ids = [text_file_id, image_file_id]
# Schritt 2: Hochgeladene Dateien überprüfen
files_ok = await verify_uploaded_files(config.mandate_id, config.user_id, file_ids)
if not files_ok:
logger.error("Fehler bei den hochgeladenen Dateien, Test wird abgebrochen")
return
# Schritt 3: Chat-Workflow ausführen
workflow_result = await run_chat_workflow(config, file_ids)
# Schritt 4: Ergebnis analysieren
analysis = analyze_workflow_result(workflow_result)
# Schritt 5: Ergebnisse speichern
save_test_results(config, workflow_result, analysis)
# Schritt 6: Bereinigen
await cleanup_test_files(config, file_ids)
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===")
except Exception as e:
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True)
logger.info("=== Test-Workflow mit Fehler beendet ===")
if __name__ == "__main__":
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen
loop = asyncio.get_event_loop()
loop.run_until_complete(main())