basic set for integration testing

This commit is contained in:
ValueOn AG 2025-04-21 17:44:28 +02:00
parent 8b234a9a30
commit f90483e3fd
24 changed files with 3303 additions and 1684 deletions

View file

@ -19,18 +19,6 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-opus-20240229
Connector_AiAnthropic_TEMPERATURE = 0.2 Connector_AiAnthropic_TEMPERATURE = 0.2
Connector_AiAnthropic_MAX_TOKENS = 2000 Connector_AiAnthropic_MAX_TOKENS = 2000
# Web scraping configuration
Connector_AiWebscraping_TIMEOUT = 10
Connector_AiWebscraping_MAX_URLS = 3
Connector_AiWebscraping_MAX_TOKENS = 30000
Connector_AiWebscraping_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
Connector_AiWebscraping_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q=
Connector_AiWebscraping_MAX_SEARCH_KEYWORDS = 3
Connector_AiWebscraping_MAX_SEARCH_RESULTS = 5
# AgentService configuration
Module_AgentserviceInterface_UPLOAD_DIR = ./_uploads
# File management configuration # File management configuration
File_Management_MAX_UPLOAD_SIZE_MB = 50 File_Management_MAX_UPLOAD_SIZE_MB = 50
File_Management_CLEANUP_INTERVAL = 240 File_Management_CLEANUP_INTERVAL = 240
@ -51,3 +39,16 @@ Security_PASSWORD_REQUIRE_NUMBERS = True
Security_PASSWORD_REQUIRE_SPECIAL = True Security_PASSWORD_REQUIRE_SPECIAL = True
Security_FAILED_LOGIN_LIMIT = 5 Security_FAILED_LOGIN_LIMIT = 5
Security_LOCK_DURATION_MINUTES = 30 Security_LOCK_DURATION_MINUTES = 30
# Agent Webcrawler configuration
Agent_Webcrawler_TIMEOUT = 10
Agent_Webcrawler_MAX_URLS = 3
Agent_Webcrawler_MAX_TOKENS = 30000
Agent_Webcrawler_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36
Agent_Webcrawler_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q=
Agent_Webcrawler_MAX_SEARCH_KEYWORDS = 3
Agent_Webcrawler_MAX_SEARCH_RESULTS = 5
# Agent Coder configuration
Agent_Coder_INSTALL_TIMEOUT = 180
Agent_Coder_EXECUTION_TIMEOUT = 60

View file

@ -10,76 +10,76 @@ logger = logging.getLogger(__name__)
class DatabaseConnector: class DatabaseConnector:
""" """
Ein Konnektor für JSON-basierte Datenspeicherung. A connector for JSON-based data storage.
Stellt generische Datenbankoperationen bereit. Provides generic database operations.
""" """
def __init__(self, db_host: str, db_database: str, db_user: str = None, db_password: str = None, mandate_id: int = None, user_id: int = None): def __init__(self, db_host: str, db_database: str, db_user: str = None, db_password: str = None, mandate_id: int = None, user_id: int = None):
""" """
Initialisiert den JSON-Datenbankkonnektor. Initializes the JSON database connector.
Args: Args:
db_host: Verzeichnis für die JSON-Dateien db_host: Directory for the JSON files
db_database = Datenbankname db_database = Database name
db_user: Benutzername für die Authentifizierung (optional) db_user: Username for authentication (optional)
db_password: API-Schlüssel für die Authentifizierung (optional) db_password: API key for authentication (optional)
mandate_id: Kontext-Parameter für den Mandanten mandate_id: Context parameter for the tenant
user_id: Kontext-Parameter für den Benutzer user_id: Context parameter for the user
""" """
# Speichere die Eingabeparameter # Store the input parameters
self.db_host = db_host self.db_host = db_host
self.db_database = db_database self.db_database = db_database
self.db_user = db_user self.db_user = db_user
self.db_password = db_password self.db_password = db_password
# Prüfe, ob Kontext-Parameter gesetzt sind # Check if context parameters are set
if mandate_id is None or user_id is None: if mandate_id is None or user_id is None:
raise ValueError("mandate_id und user_id müssen gesetzt sein") raise ValueError("mandate_id and user_id must be set")
# Stelle sicher, dass das Datenbankverzeichnis existiert # Ensure the database directory exists
self.db_folder=os.path.join(self.db_host,self.db_database) self.db_folder=os.path.join(self.db_host,self.db_database)
os.makedirs(self.db_folder, exist_ok=True) os.makedirs(self.db_folder, exist_ok=True)
# Cache für geladene Daten # Cache for loaded data
self._tables_cache = {} self._tables_cache = {}
# System-Tabelle initialisieren # Initialize system table
self._system_table_name = "_system" self._system_table_name = "_system"
self._initialize_system_table() self._initialize_system_table()
# Temporär mandate_id und user_id speichern # Temporarily store mandate_id and user_id
self._mandate_id = mandate_id self._mandate_id = mandate_id
self._user_id = user_id self._user_id = user_id
# Wenn mandate_id oder user_id 0 sind, versuche die initialen IDs zu verwenden # If mandate_id or user_id are 0, try to use the initial IDs
if mandate_id == 0: if mandate_id == 0:
initial_mandate_id = self.get_initial_id("mandates") initial_mandate_id = self.get_initial_id("mandates")
if initial_mandate_id is not None: if initial_mandate_id is not None:
self._mandate_id = initial_mandate_id self._mandate_id = initial_mandate_id
logger.info(f"Verwende initiale mandate_id: {initial_mandate_id} statt 0") logger.info(f"Using initial mandate_id: {initial_mandate_id} instead of 0")
if user_id == 0: if user_id == 0:
initial_user_id = self.get_initial_id("users") initial_user_id = self.get_initial_id("users")
if initial_user_id is not None: if initial_user_id is not None:
self._user_id = initial_user_id self._user_id = initial_user_id
logger.info(f"Verwende initiale user_id: {initial_user_id} statt 0") logger.info(f"Using initial user_id: {initial_user_id} instead of 0")
# Setze die effektiven IDs als Eigenschaften # Set the effective IDs as properties
self.mandate_id = self._mandate_id self.mandate_id = self._mandate_id
self.user_id = self._user_id self.user_id = self._user_id
logger.info(f"DatabaseConnector initialisiert für Verzeichnis: {self.db_folder}") logger.info(f"DatabaseConnector initialized for directory: {self.db_folder}")
logger.debug(f"Kontext: mandate_id={self.mandate_id}, user_id={self.user_id}") logger.debug(f"Context: mandate_id={self.mandate_id}, user_id={self.user_id}")
def _initialize_system_table(self): def _initialize_system_table(self):
"""Initialisiert die System-Tabelle, falls sie noch nicht existiert.""" """Initializes the system table if it doesn't exist yet."""
system_table_path = self._get_table_path(self._system_table_name) system_table_path = self._get_table_path(self._system_table_name)
if not os.path.exists(system_table_path): if not os.path.exists(system_table_path):
empty_system_table = {} empty_system_table = {}
self._save_system_table(empty_system_table) self._save_system_table(empty_system_table)
logger.info(f"System-Tabelle initialisiert in {system_table_path}") logger.info(f"System table initialized in {system_table_path}")
def _load_system_table(self) -> Dict[str, int]: def _load_system_table(self) -> Dict[str, int]:
"""Lädt die System-Tabelle mit den initialen IDs.""" """Loads the system table with the initial IDs."""
system_table_path = self._get_table_path(self._system_table_name) system_table_path = self._get_table_path(self._system_table_name)
try: try:
if os.path.exists(system_table_path): if os.path.exists(system_table_path):
@ -88,66 +88,66 @@ class DatabaseConnector:
else: else:
return {} return {}
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Laden der System-Tabelle: {e}") logger.error(f"Error loading the system table: {e}")
return {} return {}
def _save_system_table(self, data: Dict[str, int]) -> bool: def _save_system_table(self, data: Dict[str, int]) -> bool:
"""Speichert die System-Tabelle mit den initialen IDs.""" """Saves the system table with the initial IDs."""
system_table_path = self._get_table_path(self._system_table_name) system_table_path = self._get_table_path(self._system_table_name)
try: try:
with open(system_table_path, 'w', encoding='utf-8') as f: with open(system_table_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False) json.dump(data, f, indent=2, ensure_ascii=False)
return True return True
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Speichern der System-Tabelle: {e}") logger.error(f"Error saving the system table: {e}")
return False return False
def _get_table_path(self, table: str) -> str: def _get_table_path(self, table: str) -> str:
"""Gibt den vollständigen Pfad zu einer Tabellendatei zurück""" """Returns the full path to a table file"""
return os.path.join(self.db_folder, f"{table}.json") return os.path.join(self.db_folder, f"{table}.json")
def _load_table(self, table: str) -> List[Dict[str, Any]]: def _load_table(self, table: str) -> List[Dict[str, Any]]:
"""Lädt eine Tabelle aus der entsprechenden JSON-Datei""" """Loads a table from the corresponding JSON file"""
path = self._get_table_path(table) path = self._get_table_path(table)
# Wenn die Tabelle die System-Tabelle ist, lade sie direkt # If the table is the system table, load it directly
if table == self._system_table_name: if table == self._system_table_name:
return [] # Die System-Tabelle wird nicht wie normale Tabellen behandelt return [] # The system table is not treated like normal tables
# Wenn die Tabelle bereits im Cache ist, verwende den Cache # If the table is already in the cache, use the cache
if table in self._tables_cache: if table in self._tables_cache:
# logger.info(f"Lade Tabelle {table} aus Cache") # logger.info(f"Loading table {table} from cache")
return self._tables_cache[table] return self._tables_cache[table]
# Ansonsten lade die Datei # Otherwise load the file
try: try:
if os.path.exists(path): if os.path.exists(path):
# logger.info(f"Lade Tabelle {table} aus JSON {path}") # logger.info(f"Loading table {table} from JSON {path}")
with open(path, 'r', encoding='utf-8') as f: with open(path, 'r', encoding='utf-8') as f:
data = json.load(f) data = json.load(f)
self._tables_cache[table] = data self._tables_cache[table] = data
# Wenn Daten geladen wurden und noch keine initiale ID registriert ist, # If data was loaded and no initial ID is registered yet,
# registriere die ID des ersten Datensatzes (falls vorhanden) # register the ID of the first record (if available)
if data and not self.has_initial_id(table): if data and not self.has_initial_id(table):
if "id" in data[0]: if "id" in data[0]:
self.register_initial_id(table, data[0]["id"]) self._register_initial_id(table, data[0]["id"])
logger.info(f"Initiale ID {data[0]['id']} für Tabelle {table} nachträglich registriert") logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered")
return data return data
else: else:
# Wenn die Datei nicht existiert, erstelle eine leere Tabelle # If the file doesn't exist, create an empty table
logger.info(f"Neue Tabelle {table}") logger.info(f"New table {table}")
self._tables_cache[table] = [] self._tables_cache[table] = []
self._save_table(table, []) self._save_table(table, [])
return [] return []
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Laden der Tabelle {table}: {e}") logger.error(f"Error loading table {table}: {e}")
return [] return []
def _save_table(self, table: str, data: List[Dict[str, Any]]) -> bool: def _save_table(self, table: str, data: List[Dict[str, Any]]) -> bool:
"""Speichert eine Tabelle in der entsprechenden JSON-Datei""" """Saves a table to the corresponding JSON file"""
# Die System-Tabelle wird speziell behandelt # The system table is handled specially
if table == self._system_table_name: if table == self._system_table_name:
return False return False
@ -156,43 +156,44 @@ class DatabaseConnector:
with open(path, 'w', encoding='utf-8') as f: with open(path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False) json.dump(data, f, indent=2, ensure_ascii=False)
# Aktualisiere den Cache # Update the cache
self._tables_cache[table] = data self._tables_cache[table] = data
return True return True
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Speichern der Tabelle {table}: {e}") logger.error(f"Error saving table {table}: {e}")
return False return False
def _filter_by_context(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]: def _filter_by_context(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
""" """
Filtert Datensätze nach dem Mandanten- und Benutzerkontext, Filters records by tenant and user context,
sofern diese Felder im Datensatz existieren. if these fields exist in the record.
""" """
filtered_records = [] filtered_records = []
for record in records: for record in records:
# Prüfe, ob mandate_id im Datensatz existiert und nicht null ist # Check if mandate_id exists in the record and is not null
has_mandate = "mandate_id" in record and record["mandate_id"] is not None and record["mandate_id"] != "" has_mandate = "mandate_id" in record and record["mandate_id"] is not None and record["mandate_id"] != ""
# Prüfe, ob user_id im Datensatz existiert und nicht null ist # Check if user_id exists in the record and is not null
has_user = "user_id" in record and record["user_id"] is not None and record["user_id"] != "" has_user = "user_id" in record and record["user_id"] is not None and record["user_id"] != ""
# Wenn beides existiert, filtere entsprechend # If both exist, filter accordingly
if has_mandate and has_user: if has_mandate and has_user:
if record["mandate_id"] == self.mandate_id: if record["mandate_id"] == self.mandate_id:
filtered_records.append(record) filtered_records.append(record)
# Wenn nur mandate_id existiert # If only mandate_id exists
elif has_mandate and not has_user: elif has_mandate and not has_user:
if record["mandate_id"] == self.mandate_id: if record["mandate_id"] == self.mandate_id:
filtered_records.append(record) filtered_records.append(record)
# Wenn weder mandate_id noch user_id existieren, füge den Datensatz hinzu # If neither mandate_id nor user_id exist, add the record
elif not has_mandate and not has_user: elif not has_mandate and not has_user:
filtered_records.append(record) filtered_records.append(record)
return filtered_records return filtered_records
def _apply_record_filter(self, records: List[Dict[str, Any]], record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]: def _apply_record_filter(self, records: List[Dict[str, Any]], record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
"""Wendet einen Datensatzfilter auf die Datensätze an""" """Applies a record filter to the records"""
if not record_filter: if not record_filter:
return records return records
@ -202,17 +203,17 @@ class DatabaseConnector:
match = True match = True
for field, value in record_filter.items(): for field, value in record_filter.items():
# Prüfen, ob das Feld existiert # Check if the field exists
if field not in record: if field not in record:
match = False match = False
break break
# Wenn der Filterwert ein Integer-String ist und das Datensatzfeld ein Integer # If the filter value is an integer string and the record field is an integer
if isinstance(value, str) and value.isdigit() and isinstance(record[field], int): if isinstance(value, str) and value.isdigit() and isinstance(record[field], int):
if record[field] != int(value): if record[field] != int(value):
match = False match = False
break break
# Sonst direkter Vergleich # Otherwise direct comparison
elif record[field] != value: elif record[field] != value:
match = False match = False
break break
@ -222,66 +223,120 @@ class DatabaseConnector:
return filtered_records return filtered_records
def _register_initial_id(self, table: str, initial_id: int) -> bool:
"""
Registers the initial ID for a table.
Args:
table: Name of the table
initial_id: The initial ID
Returns:
True on success, False on error
"""
try:
# Load the current system table
system_data = self._load_system_table()
# Only register if not already present
if table not in system_data:
system_data[table] = initial_id
success = self._save_system_table(system_data)
if success:
logger.info(f"Initial ID {initial_id} for table {table} registered")
return success
return True # If already present, this is not an error
except Exception as e:
logger.error(f"Error registering the initial ID for table {table}: {e}")
return False
def _remove_initial_id(self, table: str) -> bool:
"""
Removes the initial ID for a table from the system table.
Args:
table: Name of the table
Returns:
True on success, False on error
"""
try:
# Load the current system table
system_data = self._load_system_table()
# Remove the entry if it exists
if table in system_data:
del system_data[table]
success = self._save_system_table(system_data)
if success:
logger.info(f"Initial ID for table {table} removed from system table")
return success
return True # If not present, this is not an error
except Exception as e:
logger.error(f"Error removing initial ID for table {table}: {e}")
return False
# Public API # Public API
def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]: def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]:
""" """
Gibt eine Liste aller verfügbaren Tabellen zurück. Returns a list of all available tables.
Args: Args:
filter_criteria: Optionale Filterkriterien (nicht implementiert) filter_criteria: Optional filter criteria (not implemented)
Returns: Returns:
Liste der Tabellennamen List of table names
""" """
tables = [] tables = []
try: try:
for filename in os.listdir(self.db_folder): for filename in os.listdir(self.db_folder):
if filename.endswith('.json') and not filename.startswith('_'): if filename.endswith('.json') and not filename.startswith('_'):
table_name = filename[:-5] # Entferne die .json-Endung table_name = filename[:-5] # Remove the .json extension
tables.append(table_name) tables.append(table_name)
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Lesen des Datenbankverzeichnisses: {e}") logger.error(f"Error reading the database directory: {e}")
return tables return tables
def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]: def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]:
""" """
Gibt eine Liste aller Felder einer Tabelle zurück. Returns a list of all fields in a table.
Args: Args:
table: Name der Tabelle table: Name of the table
filter_criteria: Optionale Filterkriterien (nicht implementiert) filter_criteria: Optional filter criteria (not implemented)
Returns: Returns:
Liste der Feldnamen List of field names
""" """
# Lade die Tabellendaten # Load the table data
data = self._load_table(table) data = self._load_table(table)
if not data: if not data:
return [] return []
# Nehme den ersten Datensatz als Referenz für die Felder # Take the first record as a reference for the fields
fields = list(data[0].keys()) if data else [] fields = list(data[0].keys()) if data else []
return fields return fields
def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]: def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]:
""" """
Gibt ein Schema-Objekt für eine Tabelle zurück mit Datentypen und Labels. Returns a schema object for a table with data types and labels.
Args: Args:
table: Name der Tabelle table: Name of the table
language: Sprache für die Labels (optional) language: Language for the labels (optional)
filter_criteria: Optionale Filterkriterien (nicht implementiert) filter_criteria: Optional filter criteria (not implemented)
Returns: Returns:
Schema-Objekt mit Feldern, Datentypen und Labels Schema object with fields, data types and labels
""" """
# Lade die Tabellendaten # Load the table data
data = self._load_table(table) data = self._load_table(table)
schema = {} schema = {}
@ -289,18 +344,18 @@ class DatabaseConnector:
if not data: if not data:
return schema return schema
# Nehme den ersten Datensatz als Referenz für die Felder und Datentypen # Take the first record as a reference for the fields and data types
first_record = data[0] first_record = data[0]
for field, value in first_record.items(): for field, value in first_record.items():
# Bestimme den Datentyp # Determine the data type
data_type = type(value).__name__ data_type = type(value).__name__
# Label erstellen (Standardwert ist der Feldname) # Create label (default is the field name)
label = field label = field
# Wenn model_info verfügbar ist, versuche das Label aus dem Modell zu holen # If model_info is available, try to get the label from the model
# Implementierung hängt vom tatsächlichen Modell ab # Implementation depends on the actual model
schema[field] = { schema[field] = {
"type": data_type, "type": data_type,
@ -311,27 +366,27 @@ class DatabaseConnector:
def get_recordset(self, table: str, field_filter: Dict[str, Any] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]: def get_recordset(self, table: str, field_filter: Dict[str, Any] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
""" """
Gibt eine Liste von Datensätzen aus einer Tabelle zurück, gefiltert nach Kriterien. Returns a list of records from a table, filtered by criteria.
Args: Args:
table: Name der Tabelle table: Name of the table
field_filter: Filter für Felder (welche Felder zurückgegeben werden sollen) field_filter: Filter for fields (which fields should be returned)
record_filter: Filter für Datensätze (welche Datensätze zurückgegeben werden sollen) record_filter: Filter for records (which records should be returned)
Returns: Returns:
Liste der gefilterten Datensätze List of filtered records
""" """
# Lade die Tabellendaten # Load the table data
data = self._load_table(table) data = self._load_table(table)
# Filtere nach Mandanten- und Benutzerkontext # Filter by tenant and user context
filtered_data = self._filter_by_context(data) filtered_data = self._filter_by_context(data)
# Wende record_filter an, wenn vorhanden # Apply record_filter if available
if record_filter: if record_filter:
filtered_data = self._apply_record_filter(filtered_data, record_filter) filtered_data = self._apply_record_filter(filtered_data, record_filter)
# Wenn field_filter vorhanden ist, reduziere die Felder # If field_filter is available, reduce the fields
if field_filter and isinstance(field_filter, list): if field_filter and isinstance(field_filter, list):
result = [] result = []
for record in filtered_data: for record in filtered_data:
@ -346,187 +401,157 @@ class DatabaseConnector:
def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]: def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]:
""" """
Erstellt einen neuen Datensatz in der Tabelle. Creates a new record in the table.
Args: Args:
table: Name der Tabelle table: Name of the table
record_data: Daten für den neuen Datensatz record_data: Data for the new record
Returns: Returns:
Der erstellte Datensatz The created record
""" """
# Lade die Tabellendaten # Load the table data
data = self._load_table(table) data = self._load_table(table)
# Füge mandate_id und user_id hinzu, falls nicht vorhanden oder 0 # Add mandate_id and user_id if not present or 0
if "mandate_id" not in record_data or record_data["mandate_id"] == 0: if "mandate_id" not in record_data or record_data["mandate_id"] == 0:
record_data["mandate_id"] = self.mandate_id record_data["mandate_id"] = self.mandate_id
if "user_id" not in record_data or record_data["user_id"] == 0: if "user_id" not in record_data or record_data["user_id"] == 0:
record_data["user_id"] = self.user_id record_data["user_id"] = self.user_id
# Bestimme die nächste ID, falls nicht vorhanden # Determine the next ID if not present
if "id" not in record_data: if "id" not in record_data:
next_id = 1 next_id = 1
if data: if data:
next_id = max(record["id"] for record in data if "id" in record) + 1 next_id = max(record["id"] for record in data if "id" in record) + 1
record_data["id"] = next_id record_data["id"] = next_id
# Wenn die Tabelle leer ist und eine System-ID registriert werden soll # If the table is empty and a system ID should be registered
if not data: if not data:
self.register_initial_id(table, record_data["id"]) self._register_initial_id(table, record_data["id"])
logger.info(f"Initiale ID {record_data['id']} für Tabelle {table} wurde registriert") logger.info(f"Initial ID {record_data['id']} for table {table} has been registered")
# Füge den neuen Datensatz hinzu # Add the new record
data.append(record_data) data.append(record_data)
# Speichere die aktualisierte Tabelle # Save the updated table
if self._save_table(table, data): if self._save_table(table, data):
return record_data return record_data
else: else:
raise ValueError(f"Fehler beim Erstellen des Datensatzes in Tabelle {table}") raise ValueError(f"Error creating the record in table {table}")
def record_delete(self, table: str, record_id: Union[str, int]) -> bool: def record_delete(self, table: str, record_id: Union[str, int]) -> bool:
""" """
Löscht einen Datensatz aus der Tabelle. Deletes a record from the table.
Args: Args:
table: Name der Tabelle table: Name of the table
record_id: ID des zu löschenden Datensatzes record_id: ID of the record to delete
Returns: Returns:
True bei Erfolg, False bei Fehler True on success, False on error
""" """
# Lade die Tabellendaten # Load table data
data = self._load_table(table) data = self._load_table(table)
# Prüfe, ob es sich um die initiale ID handelt # Search for the record
initial_id = self.get_initial_id(table)
if initial_id is not None and initial_id == record_id:
logger.warning(f"Versuch, den initialen Datensatz mit ID {record_id} aus Tabelle {table} zu löschen, wurde verhindert")
return False
# Suche den Datensatz
for i, record in enumerate(data): for i, record in enumerate(data):
if "id" in record and record["id"] == record_id: if "id" in record and record["id"] == record_id:
# Prüfe, ob der Datensatz zum aktuellen Mandanten gehört # Check if the record belongs to the current mandate
if "mandate_id" in record and record["mandate_id"] != self.mandate_id: if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
raise ValueError("Not your mandate") raise ValueError("Not your mandate")
# Lösche den Datensatz # Check if it's an initial record
initial_id = self.get_initial_id(table)
if initial_id is not None and initial_id == record_id:
# Remove this entry from the system table
self._remove_initial_id(table)
logger.info(f"Initial ID {record_id} for table {table} has been removed from the system table")
# Delete the record
del data[i] del data[i]
# Speichere die aktualisierte Tabelle # Save the updated table
return self._save_table(table, data) return self._save_table(table, data)
# Datensatz nicht gefunden # Record not found
return False return False
def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]: def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]:
""" """
Ändert einen Datensatz in der Tabelle. Modifies a record in the table.
Args: Args:
table: Name der Tabelle table: Name of the table
record_id: ID des zu ändernden Datensatzes record_id: ID of the record to modify
record_data: Neue Daten für den Datensatz record_data: New data for the record
Returns: Returns:
Der aktualisierte Datensatz The updated record
""" """
# Lade die Tabellendaten # Load table data
data = self._load_table(table) data = self._load_table(table)
# Suche den Datensatz # Search for the record
for i, record in enumerate(data): for i, record in enumerate(data):
if "id" in record and record["id"] == record_id: if "id" in record and record["id"] == record_id:
# Prüfe, ob der Datensatz zum aktuellen Mandanten gehört # Check if the record belongs to the current mandate
if "mandate_id" in record and record["mandate_id"] != self.mandate_id: if "mandate_id" in record and record["mandate_id"] != self.mandate_id:
raise ValueError("Not your mandate") raise ValueError("Not your mandate")
# Verhindere Änderung der ID bei initialem Datensatz # Prevent changing the ID
initial_id = self.get_initial_id(table) if "id" in record_data and record_data["id"] != record_id:
if initial_id is not None and initial_id == record_id and "id" in record_data and record_data["id"] != record_id: raise ValueError(f"The ID of a record in table {table} cannot be changed")
raise ValueError(f"Die ID des initialen Datensatzes in Tabelle {table} kann nicht geändert werden")
# Aktualisiere den Datensatz # Update the record
for key, value in record_data.items(): for key, value in record_data.items():
data[i][key] = value data[i][key] = value
# Speichere die aktualisierte Tabelle # Save the updated table
if self._save_table(table, data): if self._save_table(table, data):
return data[i] return data[i]
else: else:
raise ValueError(f"Fehler beim Aktualisieren des Datensatzes in Tabelle {table}") raise ValueError(f"Error updating record in table {table}")
# Datensatz nicht gefunden # Record not found
raise ValueError(f"Datensatz mit ID {record_id} nicht gefunden in Tabelle {table}") raise ValueError(f"Record with ID {record_id} not found in table {table}")
# System-Tabellen-Funktionen
def register_initial_id(self, table: str, initial_id: int) -> bool:
"""
Registriert die initiale ID für eine Tabelle.
Args:
table: Name der Tabelle
initial_id: Die initiale ID
Returns:
True bei Erfolg, False bei Fehler
"""
try:
# Lade die aktuelle System-Tabelle
system_data = self._load_system_table()
# Nur registrieren, wenn noch nicht vorhanden
if table not in system_data:
system_data[table] = initial_id
success = self._save_system_table(system_data)
if success:
logger.info(f"Initiale ID {initial_id} für Tabelle {table} registriert")
return success
return True # Wenn bereits vorhanden, ist das kein Fehler
except Exception as e:
logger.error(f"Fehler beim Registrieren der initialen ID für Tabelle {table}: {e}")
return False
def get_initial_id(self, table: str) -> Optional[int]:
"""
Gibt die initiale ID für eine Tabelle zurück.
Args:
table: Name der Tabelle
Returns:
Die initiale ID oder None, wenn nicht vorhanden
"""
system_data = self._load_system_table()
initial_id = system_data.get(table)
if initial_id is None:
logger.debug(f"Keine initiale ID für Tabelle {table} gefunden")
return initial_id
def has_initial_id(self, table: str) -> bool: def has_initial_id(self, table: str) -> bool:
""" """
Prüft, ob eine initiale ID für eine Tabelle registriert ist. Checks if an initial ID is registered for a table.
Args: Args:
table: Name der Tabelle table: Name of the table
Returns: Returns:
True, wenn eine initiale ID registriert ist, sonst False True if an initial ID is registered, otherwise False
""" """
system_data = self._load_system_table() system_data = self._load_system_table()
return table in system_data return table in system_data
def get_all_initial_ids(self) -> Dict[str, int]: def get_initial_id(self, table: str) -> Optional[int]:
""" """
Gibt alle registrierten initialen IDs zurück. Returns the initial ID for a table.
Args:
table: Name of the table
Returns: Returns:
Dictionary mit Tabellennamen als Schlüssel und initialen IDs als Werte The initial ID or None if not present
""" """
system_data = self._load_system_table() system_data = self._load_system_table()
return system_data.copy() # Kopie zurückgeben, um das Original zu schützen initial_id = system_data.get(table)
if initial_id is None:
logger.debug(f"No initial ID found for table {table}")
return initial_id
def get_all_initial_ids(self) -> Dict[str, int]:
"""
Returns all registered initial IDs.
Returns:
Dictionary with table names as keys and initial IDs as values
"""
system_data = self._load_system_table()
return system_data.copy() # Return a copy to protect the original

View file

@ -0,0 +1,814 @@
"""
Coder agent for development and execution of Python code.
Optimized for the new task-based processing.
"""
import logging
import json
import re
import uuid
import os
import subprocess
import tempfile
import shutil
import sys
from typing import Dict, Any, List, Optional, Tuple
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentCoder(AgentBase):
"""Agent for development and execution of Python code"""
def __init__(self):
"""Initialize the coder agent"""
super().__init__()
self.name = "coder"
self.description = "Develops and executes Python code for data processing and automation"
self.capabilities = [
"code_development",
"data_processing",
"file_processing",
"automation",
"code_execution"
]
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.1 # Lower temperature for deterministic code generation
# Auto-correction settings
self.max_correction_attempts = 3 # Maximum number of correction attempts
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and perform code development/execution.
Args:
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
A dictionary containing:
- feedback: Text response explaining the code execution
- documents: List of created document objects
"""
try:
# Extract relevant task information
prompt = task.get("prompt", "")
input_documents = task.get("input_documents", [])
output_specs = task.get("output_specifications", [])
context_info = task.get("context", {})
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Coder agent")
return {
"feedback": "The Coder agent is not properly configured.",
"documents": []
}
# Extract context from input documents
document_context = self._extract_document_context(input_documents)
# Generate code based on the prompt and document context
logger.info("Generating code based on the task")
code_to_execute, requirements = await self._generate_code_from_prompt(prompt, document_context)
if not code_to_execute:
logger.warning("AI couldn't generate any code")
return {
"feedback": "I couldn't generate executable code based on the task. Please provide more detailed instructions.",
"documents": []
}
logger.info(f"Code generated with AI ({len(code_to_execute)} characters)")
# Collect created documents
generated_documents = []
# Add code as first document
code_doc = {
"label": "generated_code.py",
"content": code_to_execute
}
generated_documents.append(code_doc)
# Execute code with auto-correction loop
execution_context = {
"input_documents": input_documents,
"task": task
}
# Enhanced execution with auto-correction
result, attempts_info = await self._execute_with_auto_correction(
code_to_execute,
requirements,
execution_context,
prompt # Original prompt/message
)
# Create output documents based on execution result and output specifications
if result.get("success", False):
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logger.info("Code executed successfully")
# Determine output type of the result
result_docs = self._generate_result_documents(
attempts_info[-1]["code"], # Last successful code
output,
execution_result,
output_specs
)
# Add result documents
generated_documents.extend(result_docs)
# Create feedback for successful execution
feedback = f"I successfully executed the code and generated {len(result_docs)} output files."
if attempts_info and len(attempts_info) > 1:
feedback += f" (This required {len(attempts_info)-1} correction attempts)"
else:
# Code execution failed after all attempts
error = result.get("error", "Unknown error")
logger.error(f"Error in code execution after all correction attempts: {error}")
# Add error log as additional document
error_doc = {
"label": "execution_error.txt",
"content": f"Execution error:\n\n{error}"
}
generated_documents.append(error_doc)
# Create feedback for failed execution
feedback = f"An error occurred during code execution after {len(attempts_info)} correction attempts."
# If no specific outputs requested, create standard outputs
if not output_specs and result.get("success", False):
# Add standard output document
output_doc = {
"label": "execution_output.txt",
"content": output
}
generated_documents.append(output_doc)
# If a result is available, also add as JSON document
if execution_result:
result_json = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
result_doc = {
"label": "execution_result.json",
"content": result_json
}
generated_documents.append(result_doc)
return {
"feedback": feedback,
"documents": generated_documents
}
except Exception as e:
error_msg = f"Error during processing by the Coder agent: {str(e)}"
logger.error(error_msg)
return {
"feedback": f"An error occurred during code processing: {str(e)}",
"documents": []
}
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents for code generation.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
context_parts = []
for doc in documents:
doc_name = doc.get("name", "Unnamed document")
context_parts.append(f"--- {doc_name} ---")
for content in doc.get("contents", []):
if content.get("metadata", {}).get("is_text", False):
context_parts.append(content.get("data", ""))
return "\n\n".join(context_parts)
def _generate_result_documents(self, code: str, output: str, execution_result: Any,
output_specs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Generate output documents based on execution results and specifications.
Args:
code: Executed code
output: Text output of the execution
execution_result: Result object from execution
output_specs: Output specifications
Returns:
List of generated document objects
"""
documents = []
# If no specific outputs requested
if not output_specs:
return documents
# Generate appropriate document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Determine output type based on file extension
format_type = self._determine_format_type(output_label)
# Generate document content based on format and output
if "code" in output_label.lower() or format_type in ["py", "js", "html", "css"]:
# Code document
documents.append({
"label": output_label,
"content": code
})
elif "output" in output_label.lower() or format_type == "txt":
# Output document
documents.append({
"label": output_label,
"content": output
})
elif format_type in ["json", "yml", "yaml"] and execution_result:
# JSON result document
if isinstance(execution_result, (dict, list)):
content = json.dumps(execution_result, indent=2)
else:
content = str(execution_result)
documents.append({
"label": output_label,
"content": content
})
else:
# Generic result document (fallback)
result_str = ""
if execution_result:
if isinstance(execution_result, (dict, list)):
result_str = json.dumps(execution_result, indent=2)
else:
result_str = str(execution_result)
documents.append({
"label": output_label,
"content": f"Code output:\n\n{output}\n\nResult:\n\n{result_str}"
})
return documents
def _determine_format_type(self, output_label: str) -> str:
"""
Determine the format type based on the filename.
Args:
output_label: Output filename
Returns:
Format type (py, js, json, txt, etc.)
"""
if not '.' in output_label:
return "txt" # Default format
extension = output_label.split('.')[-1].lower()
return extension
async def _execute_with_auto_correction(
self,
initial_code: str,
requirements: List[str],
context: Dict[str, Any],
original_prompt: str
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
Execute code with automatic error correction and retry attempts.
Args:
initial_code: The initial Python code
requirements: List of required packages
context: Additional context for execution
original_prompt: The original user request/prompt
Returns:
Tuple of (final execution result, list of attempt info dictionaries)
"""
# Initialize tracking data
current_code = initial_code
current_requirements = requirements.copy() if requirements else []
attempts_info = []
# Execute with correction loop
for attempt in range(1, self.max_correction_attempts + 1):
if attempt == 1:
logger.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})")
else:
logger.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})")
# Execute current code version
result = await self._execute_code(current_code, current_requirements, context)
# Record attempt information
attempts_info.append({
"attempt": attempt,
"code": current_code,
"error": result.get("error", ""),
"success": result.get("success", False)
})
# Check if execution was successful
if result.get("success", False):
# Success! Return result and attempt info
return result, attempts_info
# Failed execution - check if max attempt limit reached
if attempt >= self.max_correction_attempts:
logger.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached")
break
# Correct code based on the error
error_message = result.get("error", "Unknown error")
logger.info(f"Attempting to fix code error: {error_message[:200]}...")
# Generate corrected code
corrected_code, new_requirements = await self._generate_code_correction(
current_code,
error_message,
original_prompt,
current_requirements
)
# Update for next attempt
if corrected_code:
current_code = corrected_code
# Add new requirements
if new_requirements:
for req in new_requirements:
if req not in current_requirements:
current_requirements.append(req)
logger.info(f"Added new requirement: {req}")
else:
# Correction couldn't be generated, end loop
logger.warning("Couldn't generate code correction")
break
# If we reach here, all attempts failed - return last result and attempt info
return result, attempts_info
async def _generate_code_correction(
self,
code: str,
error_message: str,
original_prompt: str,
current_requirements: List[str] = None
) -> Tuple[str, List[str]]:
"""
Generate a corrected version of code based on error messages.
Args:
code: The code that generated errors
error_message: The error message to fix
original_prompt: The original task/requirements
current_requirements: List of currently required packages
Returns:
Tuple of (corrected code, new requirements list)
"""
try:
# Create detailed prompt for code correction
correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
ORIGINAL TASK:
{original_prompt}
CURRENT CODE:
```python
{code}
```
ERROR MESSAGE:
```
{error_message}
```
CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
Your task is to analyze the error and provide a corrected version of the code.
Focus specifically on fixing the error while maintaining the original functionality.
Common fixes include:
- Fixing syntax errors (missing parentheses, indentation, etc.)
- Solving import errors by adding appropriate requirements
- Correcting file paths or handling "file not found" errors
- Adding error handling for specific edge cases
- Fixing logical errors in the code
FORMATTING GUIDELINES:
1. Provide ONLY the complete corrected Python code WITHOUT explanations
2. Do NOT use code block markers like ```python or ```
3. Do NOT explain what the code does before or after
4. Do NOT add any text that isn't valid Python code
5. Start your answer directly with valid Python code
6. End your answer with valid Python code
If you need to add new required packages, place them in a specially formatted comment at the beginning of your code as follows:
# REQUIREMENTS: package1,package2,package3
Your entire answer must be valid Python that can be executed without modifications.
"""
# Create messages for API
messages = [
{"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, error-free Python code, without explanations, markdown formatting, or text that isn't code."},
{"role": "user", "content": correction_prompt}
]
# Call API with very low temperature for deterministic corrections
generated_content = await self.ai_service.call_api(
messages,
temperature=0.1
)
# Clean up the generated content to ensure it's only valid Python code
fixed_code = self._clean_code(generated_content)
# Extract requirements from special comment at beginning of code
new_requirements = []
for line in fixed_code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
new_requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return fixed_code, new_requirements
except Exception as e:
logging.error(f"Error generating code correction: {str(e)}")
# Return None to indicate failure
return None, []
def _clean_code(self, code: str) -> str:
"""
Clean code by removing markdown code block markers and other formatting artifacts.
Args:
code: The code string to clean
Returns:
Cleaned code string
"""
# Remove code block markers at beginning/end
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
# Process lines in reverse order to start from the end
lines = code.split('\n')
clean_lines = []
in_trailing_markdown = False
for line in reversed(lines):
stripped = line.strip()
# Check if this line contains only backticks (``` or ` or ``)
if re.match(r'^`{1,3}$', stripped):
in_trailing_markdown = True
continue
# If we've reached actual code, no more trailing markdown consideration
if stripped and not in_trailing_markdown:
in_trailing_markdown = False
# Add this line if it's not part of trailing markdown
if not in_trailing_markdown:
clean_lines.insert(0, line)
# Rejoin lines
clean_code = '\n'.join(clean_lines)
# Final cleanup for any remaining backticks
clean_code = re.sub(r'`{1,3}\s*', '', clean_code)
return clean_code.strip()
async def _generate_code_from_prompt(self, prompt: str, document_context: str) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt using the AI service.
Args:
prompt: The prompt to generate code from
document_context: Context extracted from documents
Returns:
Tuple of (generated Python code, required packages)
"""
try:
# Prepare prompt for code generation
ai_prompt = f"""Generate Python code to solve the following task:
TASK:
{prompt}
PROVIDED CONTEXT:
{document_context if document_context else "No additional context available."}
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable to store the final result.
2. At the end of your script, the result variable should be output.
3. Make your 'result' variable a dictionary or other JSON-serializable data structure containing all relevant outputs.
4. Comment your code well to explain important operations.
5. Make your code complete and self-contained.
6. Add appropriate error handling.
FORMATTING INSTRUCTIONS:
- Return ONLY the Python code, WITHOUT introduction, explanation, or conclusion text
- Do NOT use code block markers like ```python or ```
- Do NOT explain what the code does before or after
- Do NOT add any text that isn't valid Python code
- Start your answer directly with valid Python code
- End your answer with valid Python code
For required packages, place them in a specially formatted comment at the beginning of your code in one line as follows:
# REQUIREMENTS: pandas,numpy,matplotlib,requests
Your entire answer must be valid Python that can be executed without modifications.
"""
# Create messages for API
messages = [
{"role": "system", "content": "You are a Python code generator who provides ONLY clean, executable Python code with no explanations, markdown formatting, or non-code text."},
{"role": "user", "content": ai_prompt}
]
# Call API
logging.info(f"Calling AI API to generate code")
generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature)
# Clean up the generated content to ensure it's only valid Python code
code = self._clean_code(generated_content)
# Extract requirements from special comment at beginning of code
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
req_str = line.replace("# REQUIREMENTS:", "").strip()
requirements = [r.strip() for r in req_str.split(',') if r.strip()]
break
return code, requirements
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}")
# Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
# Error in code generation
print(f"An error occurred during code generation: {error_str}")
# Return error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code in an isolated environment.
Args:
code: The Python code to execute
requirements: List of required packages
context: Additional context for execution
Returns:
Result of code execution
"""
# Use virtual code executor for isolated execution
try:
executor = SimpleCodeExecutor(
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
ai_service=self.ai_service
)
# Prepare input data for the code
input_data = {"context": context} if context else {}
# Execute code
result = executor.execute_code(code, input_data)
# Clean up environment
executor.cleanup()
return result
except Exception as e:
error_message = f"Error during code execution: {str(e)}"
logger.error(error_message)
return {
"success": False,
"output": "",
"error": error_message,
"result": None
}
class SimpleCodeExecutor:
"""
A simplified executor that runs Python code in isolated virtual environments.
"""
def __init__(self,
timeout: int = 30,
max_memory_mb: int = 512,
requirements: List[str] = None,
ai_service = None):
"""
Initialize the SimpleCodeExecutor.
Args:
timeout: Maximum execution time in seconds
max_memory_mb: Maximum memory in MB
requirements: List of packages to install
ai_service: Optional - AI service for further processing
"""
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.requirements = requirements or []
self.blocked_packages = [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource-intensive packages
]
self.ai_service = ai_service
def _create_venv(self) -> str:
"""Create a virtual environment and return the path."""
# Create new environment
venv_parent_dir = tempfile.mkdtemp(prefix="code_exec_")
self.temp_dir = venv_parent_dir
venv_path = os.path.join(venv_parent_dir, "venv")
try:
# Create virtual environment
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
return venv_path
except subprocess.CalledProcessError as e:
logger.error(f"Error creating virtual environment: {e}")
raise RuntimeError(f"Virtual environment could not be created: {e}")
def _get_python_executable(self, venv_path: str) -> str:
"""Return the path to the Python executable in the virtual environment."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code in an isolated environment.
Args:
code: Python code to execute
input_data: Optional input data for the code
Returns:
Dictionary with execution results
"""
logger.info("Executing code in isolated environment")
# Create virtual environment
venv_path = self._create_venv()
# Create file for the code
code_id = uuid.uuid4().hex[:8]
code_file = os.path.join(self.temp_dir, f"code_{code_id}.py")
# Write code
with open(code_file, "w", encoding="utf-8") as f:
f.write(code)
# Get Python executable
python_executable = self._get_python_executable(venv_path)
logger.info(f"Using Python executable: {python_executable}")
# Execute code
try:
# Execute code from root directory
working_dir = os.path.dirname(code_file)
process = subprocess.run(
[python_executable, code_file],
timeout=self.timeout,
capture_output=True,
text=True,
cwd=working_dir
)
# Process output
stdout = process.stdout
stderr = process.stderr
# Get result from stdout if available
result_data = None
if process.returncode == 0 and stdout:
try:
# Look for the last line that could be JSON
for line in reversed(stdout.strip().split('\n')):
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
result_data = json.loads(line)
# Use successfully parsed JSON result
break
except json.JSONDecodeError:
# Not valid JSON, continue with next line
continue
except Exception as e:
logger.warning(f"Error parsing result from stdout: {str(e)}")
# Create result dictionary
execution_result = {
"success": process.returncode == 0,
"output": stdout,
"error": stderr if process.returncode != 0 else "",
"result": result_data,
"exit_code": process.returncode
}
except subprocess.TimeoutExpired:
logger.error(f"Execution timed out after {self.timeout} seconds")
execution_result = {
"success": False,
"output": "",
"error": f"Execution timed out (timeout after {self.timeout} seconds)",
"result": None,
"exit_code": -1
}
except Exception as e:
logger.error(f"Execution error: {str(e)}")
execution_result = {
"success": False,
"output": "",
"error": f"Execution error: {str(e)}",
"result": None,
"exit_code": -1
}
# Clean up temporary code file
try:
if os.path.exists(code_file):
os.remove(code_file)
except Exception as e:
logger.warning(f"Error cleaning up temporary code file: {e}")
return execution_result
def cleanup(self):
"""Clean up temporary resources."""
# Clean up temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
try:
shutil.rmtree(self.temp_dir)
logger.info(f"Temporary directory deleted: {self.temp_dir}")
except Exception as e:
logger.warning(f"Temporary directory {self.temp_dir} could not be deleted: {e}")
def __del__(self):
"""Cleanup during garbage collection."""
self.cleanup()
# Factory function for the Coder agent
def get_coder_agent():
"""
Factory function that returns an instance of the Coder agent.
Returns:
An instance of the Coder agent
"""
return AgentCoder()

File diff suppressed because it is too large Load diff

View file

@ -41,7 +41,7 @@ class ChatManager:
self.ai_service = ChatService() self.ai_service = ChatService()
self.lucy_interface = get_lucydom_interface(mandate_id, user_id) self.lucy_interface = get_lucydom_interface(mandate_id, user_id)
self.agent_registry = get_agent_registry() self.agent_registry = get_agent_registry()
self.agent_registry.set_ai_service(self.ai_service)
### Chat Management ### Chat Management
@ -60,7 +60,7 @@ class ChatManager:
workflow = self.workflow_init(workflow_id) workflow = self.workflow_init(workflow_id)
# 2. User-Input in Message-Objekt transformieren und im Workflow speichern # 2. User-Input in Message-Objekt transformieren und im Workflow speichern
message_user = self.chat_message_to_workflow("user", "", user_input, workflow) message_user = await self.chat_message_to_workflow("user", "", user_input, workflow)
# 3. Projektleiter-Prompt erstellen und Antwort analysieren # 3. Projektleiter-Prompt erstellen und Antwort analysieren
project_manager_response = await self.chat_prompt(message_user, workflow) project_manager_response = await self.chat_prompt(message_user, workflow)
@ -88,7 +88,7 @@ class ChatManager:
obj_results.extend(task_results) obj_results.extend(task_results)
# 6. Erstelle die finale Antwort mit den relevanten Dokumenten aus obj_final_documents # 6. Erstelle die finale Antwort mit den relevanten Dokumenten aus obj_final_documents
final_message = self.chat_final_message(obj_user_response, obj_results, obj_final_documents) final_message = await self.chat_final_message(obj_user_response, obj_final_documents, obj_results)
self.message_add(workflow, final_message) self.message_add(workflow, final_message)
# 7. Finalisiere den Workflow # 7. Finalisiere den Workflow
@ -215,7 +215,7 @@ JSON_OUTPUT = {{
# Parsen der JSON-Antwort # Parsen der JSON-Antwort
return self.parse_json_response(project_manager_output) return self.parse_json_response(project_manager_output)
def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]: async def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
""" """
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten. Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
@ -240,7 +240,7 @@ JSON_OUTPUT = {{
# Zusätzliche Dateien verarbeiten mit vollständigen Inhalten # Zusätzliche Dateien verarbeiten mit vollständigen Inhalten
additional_fileids = chat_message.get("list_file_id", []) additional_fileids = chat_message.get("list_file_id", [])
additional_files = self.process_file_ids(additional_fileids) additional_files = await self.process_file_ids(additional_fileids)
# Nachrichtenobjekt erstellen # Nachrichtenobjekt erstellen
message_object = { message_object = {
@ -254,30 +254,20 @@ JSON_OUTPUT = {{
logger.debug(f"message_user = {self.parse_json2text(message_object)}.") logger.debug(f"message_user = {self.parse_json2text(message_object)}.")
return message_object return message_object
def chat_final_message(self, obj_user_response: str, obj_results: List[Dict[str, Any]], async def chat_final_message(self, obj_user_response: str, obj_final_documents: List[Dict[str, Any]], obj_results: List[Dict[str, Any]], ) -> Dict[str, Any]:
obj_final_documents: List[Dict[str, Any]]) -> Dict[str, Any]:
""" """
Creates the final response message with documents corresponding to obj_final_documents. Creates the final response message with review of proposed and delivered.
Args: Args:
obj_user_response: Text response to the user obj_user_response: Initial text response to the user
obj_results: List of generated result documents
obj_final_documents: List of expected response documents obj_final_documents: List of expected response documents
obj_results: List of generated result documents
Returns: Returns:
Complete message object with content and relevant documents Complete message object with content and relevant documents
""" """
# Create basic message structure
final_message = {
"role": "assistant",
"agent_name": "project_manager",
"content": obj_user_response,
"documents": []
}
# Find documents that match the obj_final_documents requirements # Find documents that match the obj_final_documents requirements
matching_documents = [] matching_documents = []
doc_references = []
for answer_spec in obj_final_documents: for answer_spec in obj_final_documents:
answer_label = answer_spec.get("label") answer_label = answer_spec.get("label")
@ -287,19 +277,39 @@ JSON_OUTPUT = {{
doc_name=self.get_filename(doc) doc_name=self.get_filename(doc)
# Check if this document matches the answer specification # Check if this document matches the answer specification
if doc_name == answer_label: if doc_name == answer_label:
matching_documents.append(doc) content_ref = []
doc_type = answer_spec.get("doc_type", "Document") for c in doc.get("contents"):
doc_references.append(f"- {doc_name} ({doc_type})") content_ref.append(c.get("summary"))
doc_ref = {
"label": doc_name,
"content_summary": content_ref
}
matching_documents.append(doc_ref)
break break
# Add matching documents to the final message final_prompt = await self.ai_service.call_api([
final_message["documents"] = matching_documents {"role": "system", "content": "You are a project manager, who delivers results to a user."},
{"role": "user", "content": f"""
Give the final short feedback to the user with reference to the initial statement (obj_user_response). Provide a list of delivered files (files_deliveded). If in the list of delivered files (files_delivered) some files from the original list (files_promised) are not available, then just give a comment on this, otherwise task is completed.
# Add document references to the content if there are any Here the data:
if doc_references: obj_user_response = {self.parse_json2text(obj_user_response)}
doc_list = "\n".join(doc_references) files_promised = {self.parse_json2text(matching_documents)}
final_message["content"] += f"\n\nCreated documents:\n{doc_list}" files_deliveded = {self.parse_json2text(obj_user_response)}
"""
}
])
# Create basic message structure with proper fields
logger.debug(f"FINAL PROMPT = {self.parse_json2text(final_prompt)}.")
final_message = {
"role": "assistant",
"agent_name": "project_manager",
"content": final_prompt,
"documents": [] # DO NOT include the results documents, already with agents
}
logger.debug(f"FINAL MESSAGE = {self.parse_json2text(final_message)}.")
return final_message return final_message
@ -307,18 +317,18 @@ JSON_OUTPUT = {{
def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]: def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]:
""" """
Initialisiert einen Workflow oder lädt einen bestehenden mit Rundenzählung. Initializes a workflow or loads an existing one with round counting.
Args: Args:
workflow_id: Optional - ID des zu ladenden Workflows workflow_id: Optional - ID of the workflow to load
Returns: Returns:
Initialisiertes Workflow-Objekt Initialized workflow object
""" """
current_time = datetime.now().isoformat() current_time = datetime.now().isoformat()
if workflow_id is None or not self.lucy_interface.get_workflow(workflow_id): if workflow_id is None or not self.lucy_interface.get_workflow(workflow_id):
# Neuen Workflow erstellen # Create new workflow
new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id
workflow = { workflow = {
"id": new_workflow_id, "id": new_workflow_id,
@ -326,7 +336,8 @@ JSON_OUTPUT = {{
"user_id": self.user_id, "user_id": self.user_id,
"name": f"Workflow {new_workflow_id[:8]}", "name": f"Workflow {new_workflow_id[:8]}",
"started_at": current_time, "started_at": current_time,
"messages": [], "messages": [], # Empty list - will be filled with references
"message_ids": [], # Initialize empty message_ids list
"logs": [], "logs": [],
"data_stats": {}, "data_stats": {},
"current_round": 1, "current_round": 1,
@ -335,26 +346,82 @@ JSON_OUTPUT = {{
"waiting_for_user": False "waiting_for_user": False
} }
# In Datenbank speichern # Save to database - only the workflow metadata
self.lucy_interface.create_workflow(workflow) workflow_db = {
"id": workflow["id"],
"mandate_id": workflow["mandate_id"],
"user_id": workflow["user_id"],
"name": workflow["name"],
"started_at": workflow["started_at"],
"status": workflow["status"],
"data_stats": workflow["data_stats"],
"current_round": workflow["current_round"],
"last_activity": workflow["last_activity"],
"waiting_for_user": workflow["waiting_for_user"],
"message_ids": workflow["message_ids"] # Include message_ids
}
self.lucy_interface.create_workflow(workflow_db)
return workflow return workflow
else: else:
# Bestehenden Workflow laden # Load existing workflow
workflow = self.lucy_interface.load_workflow_state(workflow_id) workflow = self.lucy_interface.load_workflow_state(workflow_id)
# Status aktualisieren und Rundenzähler inkrementieren # Ensure message_ids exists
if "message_ids" not in workflow:
# Initialize from existing messages
workflow["message_ids"] = [msg["id"] for msg in workflow.get("messages", [])]
# Update in database
self.lucy_interface.update_workflow(workflow_id, {"message_ids": workflow["message_ids"]})
# Update status and increment round counter
workflow["status"] = "running" workflow["status"] = "running"
workflow["last_activity"] = current_time workflow["last_activity"] = current_time
workflow["waiting_for_user"] = False workflow["waiting_for_user"] = False
# Inkrementiere current_round, wenn sie existiert, sonst setze sie auf 1 # Increment current_round if it exists, otherwise set it to 1
if "current_round" in workflow: if "current_round" in workflow:
workflow["current_round"] += 1 workflow["current_round"] += 1
else: else:
workflow["current_round"] = 1 workflow["current_round"] = 1
# In Datenbank aktualisieren # Update in database - only the relevant workflow fields
self.lucy_interface.save_workflow_state(workflow) workflow_update = {
"status": workflow["status"],
"last_activity": workflow["last_activity"],
"waiting_for_user": workflow["waiting_for_user"],
"current_round": workflow["current_round"]
}
self.lucy_interface.update_workflow(workflow_id, workflow_update)
return workflow
def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Finalizes a workflow and sets the status to 'completed'.
Args:
workflow: Workflow object
Returns:
Updated workflow object
"""
# Prepare workflow update data
workflow_update = {
"status": "completed",
"last_activity": datetime.now().isoformat(),
"waiting_for_user": True
}
# Update the workflow object in memory
workflow["status"] = workflow_update["status"]
workflow["last_activity"] = workflow_update["last_activity"]
workflow["waiting_for_user"] = workflow_update["waiting_for_user"]
# Save workflow state to database - only relevant fields, not the messages list
self.lucy_interface.update_workflow(workflow["id"], workflow_update)
return workflow return workflow
async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str: async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str:
@ -382,23 +449,6 @@ JSON_OUTPUT = {{
return "\n\n".join(summary_parts) return "\n\n".join(summary_parts)
def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Finalisiert einen Workflow und setzt den Status auf 'stopped'.
Args:
workflow: Workflow-Objekt
Returns:
Aktualisiertes Workflow-Objekt
"""
workflow["status"] = "completed"
workflow["last_activity"] = datetime.now().isoformat()
workflow["waiting_for_user"] = True
# In Datenbank speichern
self.lucy_interface.save_workflow_state(workflow)
return workflow
### Agents ### Agents
@ -412,7 +462,7 @@ JSON_OUTPUT = {{
""" """
return self.agent_registry.get_agent_infos() return self.agent_registry.get_agent_infos()
def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]: async def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
""" """
Prepares input documents for an agent, sorted with newest first. Prepares input documents for an agent, sorted with newest first.
@ -440,14 +490,15 @@ JSON_OUTPUT = {{
# Search for the document in sorted workflow messages (newest first) # Search for the document in sorted workflow messages (newest first)
for message in sorted_messages: for message in sorted_messages:
for doc in message.get("documents", []): for doc in message.get("documents", []):
if (doc_file_id!="" and doc_file_id==doc.file_id) or (doc_filename!="" and self.get_filename(doc) == doc_filename): if (doc_file_id!="" and doc_file_id==doc.get("file_id")) or (doc_filename!="" and self.get_filename(doc) == doc_filename):
found_doc = doc found_doc = doc
break break
if found_doc: if found_doc:
break break
if found_doc: if found_doc:
# Process document for agent based on the specification # Process document for agent based on the specification
processed_doc = self.process_document_for_agent(found_doc, doc_spec) processed_doc = await self.process_document_for_agent(found_doc, doc_spec)
prepared_inputs.append(processed_doc) prepared_inputs.append(processed_doc)
else: else:
logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow") logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow")
@ -483,6 +534,9 @@ JSON_OUTPUT = {{
data = content.get("data", "") data = content.get("data", "")
processed_content = content.copy() processed_content = content.copy()
# Check if content data is base64 encoded
is_base64 = content.get("metadata", {}).get("base64_encoded", False)
try: try:
# Use the AI service to process the document content according to the prompt from the project manager for the document specification # Use the AI service to process the document content according to the prompt from the project manager for the document specification
summary = doc_spec.get("prompt", "Extract the relevant information from this document") summary = doc_spec.get("prompt", "Extract the relevant information from this document")
@ -506,13 +560,15 @@ JSON_OUTPUT = {{
{"role": "user", "content": ai_prompt} {"role": "user", "content": ai_prompt}
]) ])
# Update the processed content with the AI-processed data # DO NOT change the original data field
processed_content["data"] = processed_data # processed_content["data"] unchanged
processed_content["data_extracted"] = processed_data
processed_content["metadata"]["ai_processed"] = True processed_content["metadata"]["ai_processed"] = True
except Exception as e: except Exception as e:
logger.error(f"Error processing document content with AI: {str(e)}") logger.error(f"Error processing document content with AI: {str(e)}")
# Fall back to original content if AI processing fails # Fall back to original content if AI processing fails
processed_content["data_extracted"] = "(no information)"
processed_contents.append(processed_content) processed_contents.append(processed_content)
@ -552,9 +608,6 @@ JSON_OUTPUT = {{
logger.error(f"Agent '{agent_name}' not found") logger.error(f"Agent '{agent_name}' not found")
return [] return []
# Prepare input documents for the agent
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
# Prepare output document specifications # Prepare output document specifications
output_specs = [] output_specs = []
for doc in task.get("output_documents", []): for doc in task.get("output_documents", []):
@ -564,6 +617,9 @@ JSON_OUTPUT = {{
} }
output_specs.append(output_spec) output_specs.append(output_spec)
# Prepare input documents for the agent
input_documents = await self.agent_input_documents(task.get('input_documents', []), workflow)
# Create a standardized task object for the agent # Create a standardized task object for the agent
agent_task = { agent_task = {
"task_id": str(uuid.uuid4()), "task_id": str(uuid.uuid4()),
@ -581,8 +637,13 @@ JSON_OUTPUT = {{
# Execute the agent with the standardized task # Execute the agent with the standardized task
try: try:
# Process the task using the agent's standardized interface # Process the task using the agent's standardized interface
logger.debug("TASK: "+self.parse_json2text(agent_task))
logger.debug(f"Agent '{agent_name}' AI service available: {agent.ai_service is not None}")
agent_results = await agent.process_task(agent_task) agent_results = await agent.process_task(agent_task)
logger.debug(f"Agent '{agent_name}' completed task. RESULT: {self.parse_json2text(agent_results)}")
# Log the agent response # Log the agent response
self.log_add( self.log_add(
workflow, workflow,
@ -596,18 +657,17 @@ JSON_OUTPUT = {{
} }
# Create a message in the workflow with the agent's response # Create a message in the workflow with the agent's response
agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow) agent_message = await self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.") logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
return agent_message.get("documents", []) return agent_message.get("documents", [])
except Exception as e: except Exception as e:
error_msg = f"Error executing agent '{agent_name}': {str(e)}" error_msg = f"Error executing agent '{agent_name}': {str(e)}"
logger.error(error_msg) logger.error(error_msg, exc_info=True) # Add exc_info=True to get full traceback
self.log_add(workflow, error_msg, level="error") self.log_add(workflow, error_msg, level="error")
return [] return []
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]: def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
""" """
Saves all documents from agent results as files and returns a list of file IDs. Saves all documents from agent results as files and returns a list of file IDs.
@ -641,23 +701,7 @@ JSON_OUTPUT = {{
# Determine if content is base64 encoded # Determine if content is base64 encoded
is_base64 = False is_base64 = False
if not isinstance(content, bytes): if isinstance(content, dict) and content.get("metadata", {}).get("base64_encoded", False):
# Check if content might be base64 encoded
try:
if content and isinstance(content, str):
# Check for base64 pattern (simplified)
if (len(content) % 4 == 0 and
re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
# Try to decode a small sample
sample = content[:100] if len(content) > 100 else content
base64.b64decode(sample)
is_base64 = True
except Exception:
# Not base64, treat as regular text
is_base64 = False
# If content has metadata flag indicating it's base64
if isinstance(content, dict) and content.get("_is_base64", False):
is_base64 = True is_base64 = True
content = content.get("data", "") content = content.get("data", "")
@ -699,46 +743,68 @@ JSON_OUTPUT = {{
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]: def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
""" """
Fügt eine Nachricht zum Workflow hinzu und aktualisiert last_activity. Adds a message to the workflow and updates last_activity.
Saves the message in the database and updates the workflow with references.
Args: Args:
workflow: Workflow-Objekt workflow: Workflow object
message: Zu speichernde Nachricht message: Message to be saved
Returns: Returns:
ID der hinzugefügten Nachricht Added message
""" """
current_time = datetime.now().isoformat() current_time = datetime.now().isoformat()
# Sicherstellen, dass Messages-Liste existiert # Ensure messages list exists
if "messages" not in workflow: if "messages" not in workflow:
workflow["messages"] = [] workflow["messages"] = []
# Neue Nachrichten-ID generieren, falls nicht vorhanden # Generate new message ID if not present
if "id" not in message: if "id" not in message:
message["id"] = f"msg_{str(uuid.uuid4())}" message["id"] = f"msg_{str(uuid.uuid4())}"
# Workflow-ID und Zeitstempel hinzufügen # Add workflow ID and timestamps
message["workflow_id"] = workflow["id"] message["workflow_id"] = workflow["id"]
message["started_at"] = current_time message["started_at"] = current_time
message["finished_at"] = current_time message["finished_at"] = current_time
# Sequenznummer setzen # Set sequence number
message["sequence_no"] = len(workflow["messages"]) + 1 message["sequence_no"] = len(workflow["messages"]) + 1
# Status setzen # Ensure required fields are present
if "role" not in message:
# Set a default role based on agent_name
message["role"] = "assistant" if message.get("agent_name") else "user"
if "agent_name" not in message:
message["agent_name"] = ""
# Set status
message["status"] = "completed" message["status"] = "completed"
# Message zum Workflow hinzufügen # Add message to workflow
workflow["messages"].append(message) workflow["messages"].append(message)
# Workflow-Status aktualisieren # Ensure message_ids list exists
workflow["last_activity"] = current_time if "message_ids" not in workflow:
workflow["last_message_id"] = message["id"] workflow["message_ids"] = []
# In Datenbank speichern # Add message ID to the message_ids list
workflow["message_ids"].append(message["id"])
# Update workflow status
workflow["last_activity"] = current_time
# Save to database - first the message itself
self.lucy_interface.create_workflow_message(message) self.lucy_interface.create_workflow_message(message)
# Then save the workflow with updated references
workflow_update = {
"last_activity": current_time,
"message_ids": workflow["message_ids"] # Update the message_ids field
}
self.lucy_interface.update_workflow(workflow["id"], workflow_update)
return message return message
async def message_summarize(self, message: Dict[str, Any]) -> str: async def message_summarize(self, message: Dict[str, Any]) -> str:
@ -773,7 +839,7 @@ JSON_OUTPUT = {{
doc_name = self.get_filename(doc) doc_name = self.get_filename(doc)
docs_list.append(doc_name) docs_list.append(doc_name)
if docs_list: if docs_list:
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}" docs_summary = "\nDocuments:" + "\n- ".join(docs_list)
return f"[{role} {agent_name}]: {content_summary}{docs_summary}" return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
@ -833,6 +899,12 @@ JSON_OUTPUT = {{
logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}") logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}")
continue continue
# Load file content
file_content = self.lucy_interface.get_file_data(file_id)
if file_content is None:
logger.warning(f"No content found for file with ID {file_id}")
continue
# Create document # Create document
file_name_ext = file.get("name") file_name_ext = file.get("name")
document = { document = {
@ -840,13 +912,11 @@ JSON_OUTPUT = {{
"file_id": file_id, "file_id": file_id,
"name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname", "name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname",
"ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin", "ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin",
"data": base64.b64encode(file_content).decode('utf-8'), # Add file data as base64
"contents": [] "contents": []
} }
# Load contents immediately # Extract contents
file_content = self.lucy_interface.get_file_data(file_id)
if file_content is not None:
# Extract contents with the external function
contents = get_document_contents(file, file_content) contents = get_document_contents(file, file_content)
# Add summaries to each content item # Add summaries to each content item
@ -856,10 +926,8 @@ JSON_OUTPUT = {{
document["contents"] = contents document["contents"] = contents
logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries") logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries")
else:
logger.warning(f"No content found for file with ID {file_id}")
documents.append(document) documents.append(document)
except Exception as e: except Exception as e:
logger.error(f"Error processing file {file_id}: {str(e)}") logger.error(f"Error processing file {file_id}: {str(e)}")
# Continue with remaining files instead of failing # Continue with remaining files instead of failing

View file

@ -41,6 +41,10 @@ class AgentAnalyst(AgentBase):
self.chart_dpi = 100 self.chart_dpi = 100
plt.style.use(self.plt_style) plt.style.use(self.plt_style)
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
""" """
Process a standardized task structure and perform data analysis. Process a standardized task structure and perform data analysis.

File diff suppressed because it is too large Load diff

View file

@ -27,6 +27,10 @@ class AgentCreative(AgentBase):
"question_answering" "question_answering"
] ]
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
""" """
Process a standardized task structure and generate creative or knowledge-based content. Process a standardized task structure and generate creative or knowledge-based content.

View file

@ -27,6 +27,10 @@ class AgentDocumentation(AgentBase):
"knowledge_organization" "knowledge_organization"
] ]
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
""" """
Process a standardized task structure and create documentation. Process a standardized task structure and create documentation.

View file

@ -36,10 +36,14 @@ class AgentWebcrawler(AgentBase):
] ]
# Web crawling configuration # Web crawling configuration
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5")) self.max_url = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5"))
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3")) self.max_key = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3"))
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5")) self.max_result = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30")) self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_TIMEOUT", "30"))
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
@ -542,7 +546,7 @@ class AgentWebcrawler(AgentBase):
List of search results List of search results
""" """
formatted_query = quote_plus(query) formatted_query = quote_plus(query)
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}" url = f"{APP_CONFIG.get('Agent_Webcrawler_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
search_results_soup = self._read_url(url) search_results_soup = self._read_url(url)
if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'): if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'):
@ -614,7 +618,7 @@ class AgentWebcrawler(AgentBase):
BeautifulSoup object with the content or empty on errors BeautifulSoup object with the content or empty on errors
""" """
headers = { headers = {
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"), 'User-Agent': APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
'Accept': 'text/html,application/xhtml+xml,application/xml', 'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9', 'Accept-Language': 'en-US,en;q=0.9',
} }

View file

@ -98,10 +98,12 @@ def get_document_contents(file_metadata: Dict[str, Any], file_content: bytes) ->
} }
}) })
# Add generic attributes for all documents
for content in contents: for content in contents:
if isinstance(content.get("data"), bytes): if isinstance(content.get("data"), bytes):
content["data"] = base64.b64encode(content["data"]).decode('utf-8') content["data"] = base64.b64encode(content["data"]).decode('utf-8')
# Markiere in Metadaten, dass dies base64-kodiert ist # Add base64 flag
if "metadata" not in content: if "metadata" not in content:
content["metadata"] = {} content["metadata"] = {}
content["metadata"]["base64_encoded"] = True content["metadata"]["base64_encoded"] = True

View file

@ -180,7 +180,11 @@ class AgentRegistry:
Agent instance or None if not found Agent instance or None if not found
""" """
if agent_identifier in self.agents: if agent_identifier in self.agents:
return self.agents[agent_identifier] agent = self.agents[agent_identifier]
# Ensure the agent has the AI service
if hasattr(agent, 'set_dependencies') and self.ai_service:
agent.set_dependencies(ai_service=self.ai_service)
return agent
logger.error(f"Agent with identifier '{agent_identifier}' not found") logger.error(f"Agent with identifier '{agent_identifier}' not found")
return None return None

View file

@ -1,3 +1,4 @@
import os
import logging import logging
import uuid import uuid
from datetime import datetime from datetime import datetime
@ -579,11 +580,14 @@ class LucyDOMInterface:
logger.info(f"Saving file content to database for file: {file_name}") logger.info(f"Saving file content to database for file: {file_name}")
self.create_file_data(db_file["id"], file_content) self.create_file_data(db_file["id"], file_content)
# Debug: Export file to static folder
if logger.isEnabledFor(logging.DEBUG): self._export_file_to_static(file_content, db_file["id"], file_name)
# Debug: Verify database record was created # Debug: Verify database record was created
if not db_file: if not db_file:
logger.warning(f"Database record for file {file_name} was not created properly") logger.warning(f"Database record for file {file_name} was not created properly")
else: else:
logger.info(f"Database record created for file {file_name}") logger.debug(f"Database record created for file {file_name}")
logger.info(f"File upload process completed for: {file_name}") logger.info(f"File upload process completed for: {file_name}")
return db_file return db_file
@ -629,6 +633,10 @@ class LucyDOMInterface:
logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}") logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}")
raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}") raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}")
def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str):
debug_filename = f"{file_id}_{file_name}"
with open(f"./static/{debug_filename}", 'wb') as f:
f.write(file_content)
# Workflow Methoden # Workflow Methoden
@ -664,10 +672,6 @@ class LucyDOMInterface:
if "last_activity" not in workflow_data: if "last_activity" not in workflow_data:
workflow_data["last_activity"] = current_time workflow_data["last_activity"] = current_time
# Stelle sicher, dass last_message_id gesetzt ist, falls nicht vorhanden
if "last_message_id" not in workflow_data:
workflow_data["last_message_id"] = ""
return self.db.record_create("workflows", workflow_data) return self.db.record_create("workflows", workflow_data)
def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]: def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]:
@ -723,70 +727,93 @@ class LucyDOMInterface:
return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id}) return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id})
def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]: def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]:
"""Erstellt eine neue Nachricht für einen Workflow """
Creates a new message for a workflow.
Args: Args:
message_data: Die Nachrichtendaten message_data: The message data
Returns: Returns:
Die erstellte Nachricht oder None bei Fehler The created message or None on error
""" """
try: try:
# Check if required fields are present # Check if required fields are present
required_fields = ["id", "workflow_id"] required_fields = ["id", "workflow_id"]
for field in required_fields: for field in required_fields:
if field not in message_data: if field not in message_data:
logger.error(f"Pflichtfeld '{field}' fehlt in message_data") logger.error(f"Required field '{field}' missing in message_data")
raise ValueError(f"Pflichtfeld '{field}' fehlt in den Nachrichtendaten") raise ValueError(f"Required field '{field}' missing in message data")
# Validate that ID is not None # Validate that ID is not None
if message_data["id"] is None: if message_data["id"] is None:
message_data["id"] = f"msg_{uuid.uuid4()}" message_data["id"] = f"msg_{uuid.uuid4()}"
logger.warning(f"Automatisch generierte ID für Workflow-Nachricht: {message_data['id']}") logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}")
# Stellen Sie sicher, dass die benötigten Felder vorhanden sind # Ensure required fields are present
if "started_at" not in message_data and "created_at" not in message_data: if "started_at" not in message_data and "created_at" not in message_data:
message_data["started_at"] = self._get_current_timestamp() message_data["started_at"] = self._get_current_timestamp()
# Wenn "created_at" vorhanden ist, übertrage es nach "started_at"
if "created_at" in message_data and "started_at" not in message_data: if "created_at" in message_data and "started_at" not in message_data:
message_data["started_at"] = message_data["created_at"] message_data["started_at"] = message_data["created_at"]
del message_data["created_at"] del message_data["created_at"]
# Status setzen, falls nicht vorhanden # Set status if not present
if "status" not in message_data: if "status" not in message_data:
message_data["status"] = "completed" message_data["status"] = "completed"
# Sequenznummer setzen, falls nicht vorhanden # Set sequence number if not present
if "sequence_no" not in message_data: if "sequence_no" not in message_data:
# Hole aktuelle Nachrichten, um die nächste Sequenznummer zu bestimmen # Get current messages to determine next sequence number
existing_messages = self.get_workflow_messages(message_data["workflow_id"]) existing_messages = self.get_workflow_messages(message_data["workflow_id"])
message_data["sequence_no"] = len(existing_messages) + 1 message_data["sequence_no"] = len(existing_messages) + 1
# Debug-Log für die zu erstellenden Daten # Ensure role and agent_name are present
logger.debug(f"Erstelle Workflow-Nachricht mit Daten: {message_data}") if "role" not in message_data:
message_data["role"] = "assistant" if message_data.get("agent_name") else "user"
return self.db.record_create("workflow_messages", message_data) if "agent_name" not in message_data:
message_data["agent_name"] = ""
# Debug log for data to create
logger.debug(f"Creating workflow message with data: {message_data}")
# Create message in database
created_message = self.db.record_create("workflow_messages", message_data)
# Update workflow's message_ids if this is a new message
if created_message:
workflow_id = message_data["workflow_id"]
workflow = self.get_workflow(workflow_id)
if workflow:
# Get current message_ids or initialize empty list
message_ids = workflow.get("message_ids", [])
# Add the new message ID if not already in the list
if created_message["id"] not in message_ids:
message_ids.append(created_message["id"])
self.update_workflow(workflow_id, {"message_ids": message_ids})
return created_message
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Erstellen der Workflow-Nachricht: {str(e)}") logger.error(f"Error creating workflow message: {str(e)}")
# Return None instead of raising to avoid cascading failures # Return None instead of raising to avoid cascading failures
return None return None
def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]: def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]:
""" """
Aktualisiert eine bestehende Workflow-Nachricht in der Datenbank Updates an existing workflow message in the database.
with improved document handling.
Args: Args:
message_id: ID der Nachricht message_id: ID of the message
message_data: Zu aktualisierende Daten message_data: Data to update
Returns: Returns:
Das aktualisierte Nachrichtenobjekt oder None bei Fehler The updated message object or None on error
""" """
try: try:
# Print debug info # Debug info
print(f"Updating message {message_id} in database") logger.debug(f"Updating message {message_id} in database")
# Ensure message_id is provided # Ensure message_id is provided
if not message_id: if not message_id:
@ -806,39 +833,19 @@ class LucyDOMInterface:
logger.error(f"Workflow ID missing for new message {message_id}") logger.error(f"Workflow ID missing for new message {message_id}")
return None return None
# Ensure documents array is handled properly # Update existing message
if "documents" in message_data: existing_message = messages[0]
logger.info(f"Message {message_id} has {len(message_data['documents'])} documents")
# Make sure we're not storing huge content in the database # Ensure required fields present
# For each document, ensure content size is reasonable for key in ["role", "agent_name"]:
documents_to_store = [] if key not in message_data and key not in existing_message:
for doc in message_data["documents"]: message_data[key] = "assistant" if key == "role" else ""
doc_copy = doc.copy()
# Process contents array if it exists
if "contents" in doc_copy:
# Ensure contents is not too large - limit text size
for content in doc_copy["contents"]:
if content.get("type") == "text" and "text" in content:
text = content["text"]
if len(text) > 1000: # Limit text preview to 1000 chars
content["text"] = text[:1000] + "... [truncated]"
documents_to_store.append(doc_copy)
# Replace with the processed documents
message_data["documents"] = documents_to_store
# Log the update data size for debugging
update_data_size = len(str(message_data))
logger.debug(f"Update data size: {update_data_size} bytes")
# Ensure ID is in the dataset # Ensure ID is in the dataset
if 'id' not in message_data: if 'id' not in message_data:
message_data['id'] = message_id message_data['id'] = message_id
# Konvertiere created_at zu started_at falls nötig # Convert created_at to started_at if needed
if "created_at" in message_data and "started_at" not in message_data: if "created_at" in message_data and "started_at" not in message_data:
message_data["started_at"] = message_data["created_at"] message_data["started_at"] = message_data["created_at"]
del message_data["created_at"] del message_data["created_at"]
@ -1005,23 +1012,24 @@ class LucyDOMInterface:
def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool: def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool:
""" """
Speichert den kompletten Zustand eines Workflows in der Datenbank. Saves the state of a workflow to the database.
Dies umfasst den Workflow selbst, Nachrichten und Logs. Workflow data is updated, but messages are stored separately.
Args: Args:
workflow: Das vollständige Workflow-Objekt workflow: The workflow object
save_messages: Flag, ob Nachrichten gespeichert werden sollen save_messages: Flag to determine if messages should be saved
save_logs: Flag, ob Logs gespeichert werden sollen save_logs: Flag to determine if logs should be saved
Returns: Returns:
True bei Erfolg, False bei Fehler True on success, False on failure
""" """
try: try:
workflow_id = workflow.get("id") workflow_id = workflow.get("id")
if not workflow_id: if not workflow_id:
return False return False
# Extrahiere nur die für die Datenbank relevanten Workflow-Felder # Extract only the database-relevant workflow fields
# IMPORTANT: Don't store messages in the workflow table!
workflow_db_data = { workflow_db_data = {
"id": workflow_id, "id": workflow_id,
"mandate_id": workflow.get("mandate_id", self.mandate_id), "mandate_id": workflow.get("mandate_id", self.mandate_id),
@ -1030,58 +1038,56 @@ class LucyDOMInterface:
"status": workflow.get("status", "unknown"), "status": workflow.get("status", "unknown"),
"started_at": workflow.get("started_at", self._get_current_timestamp()), "started_at": workflow.get("started_at", self._get_current_timestamp()),
"last_activity": workflow.get("last_activity", self._get_current_timestamp()), "last_activity": workflow.get("last_activity", self._get_current_timestamp()),
"last_message_id": workflow.get("last_message_id", ""),
"data_stats": workflow.get("data_stats", {}) "data_stats": workflow.get("data_stats", {})
} }
# Prüfen, ob der Workflow bereits existiert # Check if workflow already exists
existing_workflow = self.get_workflow(workflow_id) existing_workflow = self.get_workflow(workflow_id)
if existing_workflow: if existing_workflow:
self.update_workflow(workflow_id, workflow_db_data) self.update_workflow(workflow_id, workflow_db_data)
else: else:
self.create_workflow(workflow_db_data) self.create_workflow(workflow_db_data)
# Save messages
# Nachrichten speichern
if save_messages and "messages" in workflow: if save_messages and "messages" in workflow:
# Bestehende Nachrichten abrufen
existing_messages = {msg["id"]: msg for msg in self.get_workflow_messages(workflow_id)}
for message in workflow["messages"]: for message in workflow["messages"]:
message_id = message.get("id") message_id = message.get("id")
if not message_id: if not message_id:
continue continue
# Nur relevante Daten für die Datenbank extrahieren # Since each message is already saved with create_workflow_message,
# we only need to check if updates are necessary
# First, get existing message from database
existing_messages = self.get_workflow_messages(workflow_id)
existing_message = next((m for m in existing_messages if m.get("id") == message_id), None)
if existing_message:
# Check if updates are needed
has_changes = False
for key in ["role", "agent_name", "content", "status", "documents"]:
if key in message and message.get(key) != existing_message.get(key):
has_changes = True
break
if has_changes:
# Extract only relevant data for the database
message_data = { message_data = {
"id": message_id, "role": message.get("role", existing_message.get("role", "unknown")),
"workflow_id": workflow_id, "content": message.get("content", existing_message.get("content", "")),
"sequence_no": message.get("sequence_no", 0), "agent_name": message.get("agent_name", existing_message.get("agent_name", "")),
"role": message.get("role", "unknown"), "status": message.get("status", existing_message.get("status", "completed")),
"content": message.get("content"), "documents": message.get("documents", existing_message.get("documents", []))
"agent_name": message.get("agent_name"),
"status": message.get("status", "completed"),
"started_at": message.get("started_at", self._get_current_timestamp()),
"finished_at": message.get("finished_at"),
"parent_message_id": message.get("parent_message_id"),
# IMPORTANT: Include documents field to persist file attachments
"documents": message.get("documents", [])
} }
self.update_workflow_message(message_id, message_data)
# Debug logging for documents
doc_count = len(message.get("documents", []))
if doc_count > 0:
logger.info(f"Message {message_id} has {doc_count} documents to save")
# Nachricht erstellen oder aktualisieren
if message_id in existing_messages:
self.db.record_modify("workflow_messages", message_id, message_data)
else: else:
self.db.record_create("workflow_messages", message_data) # Message doesn't exist in database yet
# It should have been saved via create_workflow_message
# If not, log a warning
logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database")
# Logs speichern # Save logs
if save_logs and "logs" in workflow: if save_logs and "logs" in workflow:
# Bestehende Logs abrufen # Get existing logs
existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)} existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)}
for log in workflow["logs"]: for log in workflow["logs"]:
@ -1089,7 +1095,7 @@ class LucyDOMInterface:
if not log_id: if not log_id:
continue continue
# Nur relevante Daten für die Datenbank extrahieren # Extract only relevant data for the database
log_data = { log_data = {
"id": log_id, "id": log_id,
"workflow_id": workflow_id, "workflow_id": workflow_id,
@ -1100,7 +1106,7 @@ class LucyDOMInterface:
"agent_name": log.get("agent_name") "agent_name": log.get("agent_name")
} }
# Log erstellen oder aktualisieren # Create or update log
if log_id in existing_logs: if log_id in existing_logs:
self.db.record_modify("workflow_logs", log_id, log_data) self.db.record_modify("workflow_logs", log_id, log_data)
else: else:
@ -1108,22 +1114,22 @@ class LucyDOMInterface:
return True return True
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Speichern des Workflow-Zustands: {str(e)}") logger.error(f"Error saving workflow state: {str(e)}")
return False return False
def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]: def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]:
""" """
Lädt den kompletten Zustand eines Workflows aus der Datenbank. Loads the complete state of a workflow from the database.
Dies umfasst den Workflow selbst, Nachrichten und Logs. This includes the workflow itself, messages, and logs.
Args: Args:
workflow_id: ID des zu ladenden Workflows workflow_id: ID of the workflow to load
Returns: Returns:
Das vollständige Workflow-Objekt oder None bei Fehler The complete workflow object or None on error
""" """
try: try:
# Basis-Workflow laden # Load base workflow
workflow = self.get_workflow(workflow_id) workflow = self.get_workflow(workflow_id)
if not workflow: if not workflow:
return None return None
@ -1131,41 +1137,46 @@ class LucyDOMInterface:
# Log the workflow base retrieval # Log the workflow base retrieval
logger.debug(f"Loaded base workflow {workflow_id} from database") logger.debug(f"Loaded base workflow {workflow_id} from database")
# Nachrichten laden # Load messages
messages = self.get_workflow_messages(workflow_id) messages = self.get_workflow_messages(workflow_id)
# Nach Sequenznummer sortieren # Sort by sequence number
messages.sort(key=lambda x: x.get("sequence_no", 0)) messages.sort(key=lambda x: x.get("sequence_no", 0))
# Debug log for messages and document counts # Debug log for messages and document counts
message_count = len(messages) message_count = len(messages)
logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}") logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}")
# Check if message_ids exists and is valid
message_ids = workflow.get("message_ids", [])
if not message_ids or len(message_ids) != len(messages):
# Rebuild message_ids from messages
message_ids = [msg.get("id") for msg in messages]
# Update in database
self.update_workflow(workflow_id, {"message_ids": message_ids})
logger.info(f"Rebuilt message_ids for workflow {workflow_id}")
# Log document counts for each message # Log document counts for each message
for msg in messages: for msg in messages:
doc_count = len(msg.get("documents", [])) doc_count = len(msg.get("documents", []))
if doc_count > 0: if doc_count > 0:
logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database") logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database")
# Log document details for debugging
for i, doc in enumerate(msg.get("documents", [])):
file_id = doc.get("file_id", "unknown")
logger.debug(f"Document {i+1}: file_id={file_id}")
# Logs laden # Load logs
logs = self.get_workflow_logs(workflow_id) logs = self.get_workflow_logs(workflow_id)
# Nach Zeitstempel sortieren # Sort by timestamp
logs.sort(key=lambda x: x.get("timestamp", "")) logs.sort(key=lambda x: x.get("timestamp", ""))
# Vollständiges Workflow-Objekt zusammenbauen # Assemble complete workflow object
complete_workflow = workflow.copy() complete_workflow = workflow.copy()
complete_workflow["messages"] = messages complete_workflow["messages"] = messages
complete_workflow["message_ids"] = message_ids # Ensure message_ids is included
complete_workflow["logs"] = logs complete_workflow["logs"] = logs
return complete_workflow return complete_workflow
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Laden des Workflow-Zustands: {str(e)}") logger.error(f"Error loading workflow state: {str(e)}")
return None return None
# Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext # Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext
_lucydom_interfaces = {} _lucydom_interfaces = {}

View file

@ -3,143 +3,137 @@ from typing import List, Dict, Any, Optional
class Label(BaseModel): class Label(BaseModel):
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen""" """Label for an attribute or a class with support for multiple languages"""
default: str default: str
translations: Dict[str, str] = {} translations: Dict[str, str] = {}
def get_label(self, language: str = None): def get_label(self, language: str = None):
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar""" """Returns the label in the specified language, or the default value if not available"""
if language and language in self.translations: if language and language in self.translations:
return self.translations[language] return self.translations[language]
return self.default return self.default
class Prompt(BaseModel): class Prompt(BaseModel):
"""Datenmodell für einen Prompt""" """Data model for a prompt"""
id: int = Field(description="Eindeutige ID des Prompts") id: int = Field(description="Unique ID of the prompt")
mandate_id: int = Field(description="ID des zugehörigen Mandanten") mandate_id: int = Field(description="ID of the associated mandate")
user_id: int = Field(description="ID des Erstellers") user_id: int = Field(description="ID of the creator")
content: str = Field(description="Inhalt des Prompts") content: str = Field(description="Content of the prompt")
name: str = Field(description="Anzeigename des Prompts") name: str = Field(description="Display name of the prompt")
label: Label = Field( label: Label = Field(
default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}), default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}),
description="Label für die Klasse" description="Label for the class"
) )
# Labels für Attribute # Labels for attributes
field_labels: Dict[str, Label] = { field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}), "id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), "mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), "user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}), "content": Label(default="Content", translations={"en": "Content", "fr": "Contenu"}),
"name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}), "name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}),
} }
class FileItem(BaseModel): class FileItem(BaseModel):
"""Datenmodell für ein File""" """Data model for a file"""
id: int = Field(description="Eindeutige ID des Datenobjekts") id: int = Field(description="Unique ID of the data object")
mandate_id: int = Field(description="ID des zugehörigen Mandanten") mandate_id: int = Field(description="ID of the associated mandate")
user_id: int = Field(description="ID des Erstellers") user_id: int = Field(description="ID of the creator")
name: str = Field(description="Name des Datenobjekts") name: str = Field(description="Name of the data object")
mime_type: str = Field(description="Typ des Datenobjekts MIME-Typ") mime_type: str = Field(description="Type of the data object MIME type")
size: Optional[int] = Field(None, description="Größe des Datenobjekts in Bytes") size: Optional[int] = Field(None, description="Size of the data object in bytes")
file_hash: str = Field(description="Hash code für Deduplizierung") file_hash: str = Field(description="Hash code for deduplication")
creation_date: Optional[str] = Field(None, description="Datum des Hochladens") creation_date: Optional[str] = Field(None, description="Upload date")
workflow_id: Optional[str] = Field(None, description="ID des zugehörigen Workflows, falls vorhanden") workflow_id: Optional[str] = Field(None, description="ID of the associated workflow, if any")
label: Label = Field( label: Label = Field(
default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}), default=Label(default="Data Object", translations={"en": "Data Object", "fr": "Objet de données"}),
description="Label für die Klasse" description="Label for the class"
) )
# Labels für Attribute # Labels for attributes
field_labels: Dict[str, Label] = { field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}), "id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), "mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), "user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}), "name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}),
"mime_type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}), "mime_type": Label(default="Type", translations={"en": "Type", "fr": "Type"}),
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}), "size": Label(default="Size", translations={"en": "Size", "fr": "Taille"}),
"file_hash": Label(default="File-Hash", translations={"en": "Hash", "fr": "Hash"}), "file_hash": Label(default="File Hash", translations={"en": "Hash", "fr": "Hash"}),
"creation_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}), "creation_date": Label(default="Upload date", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
"workflow_id": Label(default="Workflow-ID", translations={"en": "Workflow ID", "fr": "ID du workflow"}) "workflow_id": Label(default="Workflow ID", translations={"en": "Workflow ID", "fr": "ID du workflow"})
} }
class FileData(BaseModel): class FileData(BaseModel):
"""Datenmodell für den File-Inhalt""" """Data model for file content"""
id: int = Field(description="Eindeutige ID des Datenobjekts") id: int = Field(description="Unique ID of the data object")
data: str = Field(description="Binärer Inhalt der Datei als Base64-String") data: str = Field(description="Binary content of the file as base64 string")
# Workflow-Modellklassen # Workflow model classes
class DocumentContent(BaseModel): class DocumentContent(BaseModel):
"""Inhalt eines Dokuments im Workflow""" """Content of a document in the workflow"""
sequence_nr: int = Field(1, description="Sequenz-Nummer des Inhaltes im Quelldokument") sequence_nr: int = Field(1, description="Sequence number of the content in the source document")
name: str = Field(description="Bezeichnung") name: str = Field(description="Designation")
ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png") ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png")
content_type: str = Field(description="MIME-Typ") content_type: str = Field(description="MIME type")
data: str = Field(description="Binärer Inhalt der Daten als Base64-String") summary: str = Field(description="Summary of the file content")
summary: str = Field(description="Zusammenfassung des Datei-Inhaltes") metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadata about the content, such as is_text flag, format information, encoding, etc.")
metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadaten zum Inhalt, wie z.B. is_text Flag, Format-Informationen, Encoding usw.")
class Document(BaseModel): class Document(BaseModel):
"""Dokument im Workflow - Referenziert direkt eine Datei in der Datenbank""" """Document in the workflow - References a file directly in the database"""
id: str = Field(description="Eindeutige ID des Dokuments") id: str = Field(description="Unique ID of the document")
name: str = Field(description="Name des Datenobjekts") name: str = Field(description="Name of the data object")
ext: str = Field(description="Extension des Datenobjekts") ext: str = Field(description="Extension of the data object")
file_id: int = Field(description="ID der referenzierten Datei in der Datenbank") file_id: int = Field(description="ID of the referenced file in the database")
contents: List[DocumentContent] = Field(description="Dokumentinhalte") data: str = Field(description="Content of the data as base64 string")
contents: List[DocumentContent] = Field(description="Document contents")
class DataStats(BaseModel): class DataStats(BaseModel):
"""Statistiken für Performance und Datennutzung""" """Statistics for performance and data usage"""
processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden") processing_time: Optional[float] = Field(None, description="Processing time in seconds")
token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)") token_count: Optional[int] = Field(None, description="Token count (for AI models)")
bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes") bytes_sent: Optional[int] = Field(None, description="Bytes sent")
bytes_received: Optional[int] = Field(None, description="Empfangene Bytes") bytes_received: Optional[int] = Field(None, description="Bytes received")
class Message(BaseModel): class Message(BaseModel):
"""Nachrichtenobjekt im Workflow""" """Message object in the workflow"""
id: str = Field(description="Eindeutige ID der Nachricht") id: str = Field(description="Unique ID of the message")
workflow_id: str = Field(description="Referenz zum übergeordneten Workflow") workflow_id: str = Field(description="Reference to the parent workflow")
parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht") parent_message_id: Optional[str] = Field(None, description="Reference to the replied message")
started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung") started_at: str = Field(description="Timestamp for message creation")
finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss") finished_at: Optional[str] = Field(None, description="Timestamp for message completion")
sequence_no: int = Field(description="Sequenznummer für Sortierung") sequence_no: int = Field(description="Sequence number for sorting")
status: str = Field(description="Status der Nachricht ('processing', 'completed')") status: str = Field(description="Status of the message ('processing', 'completed')")
role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')") role: str = Field(description="Role of the sender ('system', 'user', 'assistant')")
data_stats: Optional[DataStats] = Field(None, description="Statistiken") data_stats: Optional[DataStats] = Field(None, description="Statistics")
documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht (Referenzen zu Dateien in der Datenbank)") documents: Optional[List[Document]] = Field(None, description="Documents in this message (references to files in the database)")
content: Optional[str] = Field(None, description="Textinhalt der Nachricht") content: Optional[str] = Field(None, description="Text content of the message")
agent_name: Optional[str] = Field(None, description="Name des verwendeten Agenten") agent_name: Optional[str] = Field(None, description="Name of the agent used")
class Workflow(BaseModel): class Workflow(BaseModel):
"""Workflow-Objekt für Multi-Agent-System""" """Workflow object for multi-agent system"""
id: str = Field(description="Eindeutige ID des Workflows") id: str = Field(description="Unique ID of the workflow")
name: Optional[str] = Field(None, description="Name des Workflows") name: Optional[str] = Field(None, description="Name of the workflow")
mandate_id: int = Field(description="ID des Mandanten") mandate_id: int = Field(description="ID of the mandate")
user_id: int = Field(description="ID des Benutzers") user_id: int = Field(description="ID of the user")
status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')") status: str = Field(description="Status of the workflow ('running', 'failed', 'stopped')")
started_at: str = Field(description="Startzeitpunkt") started_at: str = Field(description="Start timestamp")
last_activity: str = Field(description="Zeitpunkt der letzten Aktivität") last_activity: str = Field(description="Timestamp of the last activity")
last_message_id: str = Field(description="The last registered message") message_ids: List[str] = Field(default=[], description="List of message IDs in this workflow")
data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken") data_stats: Optional[Dict[str, Any]] = Field(None, description="Total statistics")
messages: List[Message] = Field(default=[], description="Nachrichtenverlauf") messages: List[Message] = Field(default=[], description="Message history")
logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge") logs: List[Dict[str, Any]] = Field(default=[], description="Log entries")
# Anfragemodelle für die API # Request models for the API
class WorkflowCreateRequest(BaseModel):
"""Anfrage zur Erstellung eines neuen Workflows"""
name: Optional[str] = Field(None, description="Name des Workflows")
prompt: str = Field(description="Zu verwendender Prompt")
files: List[int] = Field(default=[], description="Liste von FileItem ID")
class UserInputRequest(BaseModel): class UserInputRequest(BaseModel):
"""Anfrage für Benutzereingabe an einen laufenden Workflow""" """Request for user input to a running workflow"""
prompt: str = Field(description="Nachricht des Benutzers") prompt: str = Field(description="Message from the user")
listFileId: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID") list_file_id: List[int] = Field(default=[], description="List of FileItem IDs")

View file

@ -1,19 +1,32 @@
....................... TASKS ....................... TASKS
please revise all chat_agents* modules:
- all comments, logs and outputs in english language
- all ai answers in the language of the user
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
documentation agent: can you do following adaptions
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
everywhere:
- to remove base64 checks ot tests. only to use base64_encoded attribute
- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted")
please tell me, where to adapt what in the code. I do not neew fully new code.
german comments in logs and prompts to translate to english. where to adapt what?
can you enhance all ai prompts to include, that the output is delivered in the language of the user?
An option to have a global variable for this, which is also trasferred with the task to the agents?
streamline self.log_add --> to use in a standardized format and to reduce messages to relevant steps
add connector to myoutlook
webcrawler_agent:
- there is a try - except mapping problem in the code. please also fix this
-
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
----------------------- OPEN ----------------------- OPEN
@ -41,6 +54,47 @@ frontend: no labels definition
----------------------- DONE ----------------------- DONE
can you do following adaptions
for document class:
- class Document to have a "data" attribute, where the file-data is stored in base64 format
based on this:
- task object for agents to enhance with this attribute
for content in contents in documents, when adding a file to a document object:
- to set "base64_encoded" if encoded. this should already be, to check
when building task for the agents:
- ensure attribute "data" is integrated, containing filedata base64 encoded
- in each content to deliver "data" as it is, optional "base64_encoded" attribute depending on data format, to add attribute "data_extracted" and to store here the extracted data from ai call
everywhere:
- to remove base64 checks ot tests. only to use base64_encoded attribute
- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted")
please tell me, where to adapt what in the code. I do not neew fully new code.
please revise all chat_agents* modules:
- all comments, logs and outputs in english language
- all ai answers in the language of the user
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
documentation agent:
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
webcrawler_agent:
- there is a try - except mapping problem in the code. please also fix this
-
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
alle expliziten prompt ersetzen. alle expliziten prompt ersetzen.

1
result.txt Normal file
View file

@ -0,0 +1 @@
{'total_pixels': None, 'total_characters': None}

View file

@ -70,7 +70,7 @@ async def list_workflows(current_user: Dict[str, Any] = Depends(get_current_acti
@router.post("/{workflow_id}/user-input", response_model=Dict[str, Any]) @router.post("/{workflow_id}/user-input", response_model=Dict[str, Any])
async def submit_user_input( async def submit_user_input(
workflow_id: Optional[str] = Path(None, description="ID des Workflows (optional)"), workflow_id: Optional[str] = Path(None, description="ID des Workflows (optional)"),
user_input: Dict[str, Any] = Body(...), user_input: lucydom_model.UserInputRequest = Body(...),
current_user: Dict[str, Any] = Depends(get_current_active_user) current_user: Dict[str, Any] = Depends(get_current_active_user)
): ):
""" """
@ -84,7 +84,11 @@ async def submit_user_input(
try: try:
# Workflow mit dem Chat-Manager fortsetzen oder neu starten # Workflow mit dem Chat-Manager fortsetzen oder neu starten
workflow = await context.interface_chat.chat_run(user_input, workflow_id) user_input_dict = {
"prompt": user_input.prompt,
"list_file_id": user_input.list_file_id
}
workflow = await context.interface_chat.chat_run(user_input_dict, workflow_id)
if not workflow: if not workflow:
raise HTTPException( raise HTTPException(

View file

@ -0,0 +1,10 @@
This is a test text file for the ChatManager workflow.
It contains some information for testing document processing.
The ChatManager should be able to process this file
and extract relevant information from it.
This file serves as an example for text-based documents that can be
used in a chat workflow.

BIN
static/2_test_image.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 284 B

View file

@ -0,0 +1,52 @@
# REQUIREMENTS: Pillow
from PIL import Image
def calculate_image_pixels(image_path):
try:
with Image.open(image_path) as img:
width, height = img.size
total_pixels = width * height
return total_pixels
except Exception as e:
print(f"Error calculating image pixels: {e}")
return None
def calculate_text_characters(text_path):
try:
with open(text_path, 'r', encoding='utf-8') as file:
text = file.read()
total_characters = len(text)
return total_characters
except Exception as e:
print(f"Error calculating text characters: {e}")
return None
def main():
image_path = 'test_image'
text_path = 'test_document'
# Calculate total pixels in the image
total_pixels = calculate_image_pixels(image_path)
# Calculate total characters in the text document
total_characters = calculate_text_characters(text_path)
# Prepare the result dictionary
result = {
'total_pixels': total_pixels,
'total_characters': total_characters
}
# Write the result to a text file
try:
with open('result.txt', 'w') as result_file:
result_file.write(str(result))
except Exception as e:
print(f"Error writing result to file: {e}")
# Output the result
print(result)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,6 @@
Execution error:
Traceback (most recent call last):
File "C:\Users\pmots\AppData\Local\Temp\code_exec_itmq0xhw\code_9cc3911d.py", line 3, in <module>
from PIL import Image
ModuleNotFoundError: No module named 'PIL'

50
test2.py Normal file
View file

@ -0,0 +1,50 @@
from PIL import Image
def calculate_image_pixels(image_path):
try:
with Image.open(image_path) as img:
width, height = img.size
total_pixels = width * height
return total_pixels
except Exception as e:
print(f"Error calculating image pixels: {e}")
return None
def calculate_text_characters(text_path):
try:
with open(text_path, 'r', encoding='utf-8') as file:
text = file.read()
total_characters = len(text)
return total_characters
except Exception as e:
print(f"Error calculating text characters: {e}")
return None
def main():
image_path = 'test_image'
text_path = 'test_document'
# Calculate total pixels in the image
total_pixels = calculate_image_pixels(image_path)
# Calculate total characters in the text document
total_characters = calculate_text_characters(text_path)
# Prepare the result dictionary
result = {
'total_pixels': total_pixels,
'total_characters': total_characters
}
# Write the result to a text file
try:
with open('result.txt', 'w') as result_file:
result_file.write(str(result))
except Exception as e:
print(f"Error writing result to file: {e}")
# Output the result
print(result)
if __name__ == "__main__":
main()

View file

@ -1,6 +1,6 @@
""" """
Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads. Test script for ChatManager workflow with simulated file uploads.
Demonstriert den vollständigen Workflow von Datei-Upload bis Chat-Ausführung. Demonstrates the complete workflow from file upload to chat execution.
""" """
import asyncio import asyncio
@ -11,7 +11,7 @@ import sys
from typing import Dict, Any, List, Tuple from typing import Dict, Any, List, Tuple
from datetime import datetime from datetime import datetime
# Logging konfigurieren # Configure logging
logging.basicConfig( logging.basicConfig(
level=logging.DEBUG, level=logging.DEBUG,
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
@ -19,43 +19,43 @@ logging.basicConfig(
) )
logger = logging.getLogger("test_workflow") logger = logging.getLogger("test_workflow")
# Pfad zum Projektverzeichnis hinzufügen # Add project directory to path
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Module importieren # Import modules
from modules.lucydom_interface import get_lucydom_interface from modules.lucydom_interface import get_lucydom_interface
from modules.chat import get_chat_manager from modules.chat import get_chat_manager
async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]: async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
""" """
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch. Creates a text file and an image for testing and uploads them to the database.
Args: Args:
mandate_id: ID des Mandanten mandate_id: ID of the mandate
user_id: ID des Benutzers user_id: ID of the user
Returns: Returns:
Tuple mit (text_file_id, image_file_id) Tuple with (text_file_id, image_file_id)
""" """
logger.info("Erstelle Test-Dateien...") logger.info("Creating test files...")
lucy_interface = get_lucydom_interface(mandate_id, user_id) lucy_interface = get_lucydom_interface(mandate_id, user_id)
# Textdatei erstellen # Create text file
text_content = """ text_content = """
Dies ist eine Test-Textdatei für den ChatManager-Workflow. This is a test text file for the ChatManager workflow.
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung. It contains some information for testing document processing.
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten The ChatManager should be able to process this file
und daraus relevante Informationen zu extrahieren. and extract relevant information from it.
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem This file serves as an example for text-based documents that can be
Chat-Workflow verwendet werden können. used in a chat workflow.
""" """
text_file_bytes = text_content.encode('utf-8') text_file_bytes = text_content.encode('utf-8')
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt") text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
text_file_id = text_file["id"] text_file_id = text_file["id"]
logger.info(f"Textdatei erstellt mit ID: {text_file_id}") logger.info(f"Text file created with ID: {text_file_id}")
# Create a simple test image using PIL # Create a simple test image using PIL
try: try:
@ -73,7 +73,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
# Upload to database # Upload to database
image_file = lucy_interface.save_uploaded_file(img_bytes, "test_image.png") image_file = lucy_interface.save_uploaded_file(img_bytes, "test_image.png")
image_file_id = image_file["id"] image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}") logger.info(f"Image file created with ID: {image_file_id}")
except ImportError: except ImportError:
# Fallback to the original method if PIL is not available # Fallback to the original method if PIL is not available
@ -87,7 +87,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png") image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
image_file_id = image_file["id"] image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}") logger.info(f"Image file created with ID: {image_file_id}")
return text_file_id, image_file_id return text_file_id, image_file_id
@ -95,72 +95,73 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
async def run_chat_workflow(mandate_id: int, user_id: int, file_ids: List[int]) -> Dict[str, Any]: async def run_chat_workflow(mandate_id: int, user_id: int, file_ids: List[int]) -> Dict[str, Any]:
""" """
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus. Executes a chat workflow with given file IDs.
Args: Args:
mandate_id: ID des Mandanten mandate_id: ID of the mandate
user_id: ID des Benutzers user_id: ID of the user
file_ids: Liste der Datei-IDs file_ids: List of file IDs
Returns: Returns:
Das Workflow-Ergebnis The workflow result
""" """
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}") logger.info(f"Starting chat workflow with files: {file_ids}")
# ChatManager initialisieren # Initialize ChatManager
chat_manager = get_chat_manager(mandate_id, user_id) chat_manager = get_chat_manager(mandate_id, user_id)
# Benutzeranfrage erstellen # Create user request
user_input = { user_input = {
"message": "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt.", "prompt": "Bitte zähle mir zusammen wieviele Pixel das Bild hat und wieviele Zeichen der Text der Dokumente hat",
"additional_fileids": file_ids "list_file_id": file_ids
} }
# Chat-Workflow ausführen # Execute chat workflow
workflow_result = await chat_manager.chat_run(user_input) workflow_result = await chat_manager.chat_run(user_input)
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}") logger.info(f"Workflow completed with ID: {workflow_result['id']}")
return workflow_result return workflow_result
def analyze_workflow_result(workflow: Dict[str, Any]) -> None: def analyze_workflow_result(workflow: Dict[str, Any]) -> None:
""" """
Analysiert und gibt Informationen über das Workflow-Ergebnis aus. Analyzes and outputs information about the workflow result.
Args: Args:
workflow: Das Workflow-Ergebnis workflow: The workflow result
""" """
logger.info("Analysiere Workflow-Ergebnis:") logger.info("Analyzing workflow result:")
logger.info(f"Workflow-ID: {workflow['id']}") logger.info(f"Workflow ID: {workflow['id']}")
logger.info(f"Status: {workflow['status']}") logger.info(f"Status: {workflow['status']}")
logger.info(f"Anzahl Nachrichten: {len(workflow.get('messages', []))}") logger.info(f"Number of messages: {len(workflow.get('messages', []))}")
for i, message in enumerate(workflow.get('messages', [])): for i, message in enumerate(workflow.get('messages', [])):
logger.info(f"Nachricht {i+1}:") logger.info(f"Message {i+1}:")
logger.info(f" Rolle: {message.get('role', 'unbekannt')}") logger.info(f" Role: {message.get('role', 'unknown')}")
# Nur die ersten 100 Zeichen des Inhalts anzeigen # Show only the first 100 characters of content
content = message.get('content', '') content = message.get('content', '')
content_preview = content[:100] + '...' if len(content) > 100 else content content_preview = content[:100] + '...' if len(content) > 100 else content
logger.info(f" Inhalt: {content_preview}") logger.info(f" Content: {content_preview}")
# Dokumente in der Nachricht anzeigen # Show documents in the message
documents = message.get('documents', []) documents = message.get('documents', [])
logger.info(f" Dokumente: {len(documents)}") logger.info(f" Documents: {len(documents)}")
for j, doc in enumerate(documents): for j, doc in enumerate(documents):
doc_id = doc.get('id', 'keine ID') doc_id = doc.get('id', 'no ID')
file_id = doc.get('file_id', 'keine file_id') file_id = doc.get('file_id', 'no file_id')
logger.info(f" Dokument {j+1}: ID={doc_id}, File-ID={file_id}") logger.info(f" Document {j+1}: ID={doc_id}, File-ID={file_id}")
# Informationen über Inhalte # Information about contents
contents = doc.get('contents', []) contents = doc.get('contents', [])
for k, content in enumerate(contents): for k, content in enumerate(contents):
content_name = content.get('name', 'kein Name') content_name = content.get('name', 'no name')
content_type = content.get('content_type', 'unbekannt') content_type = content.get('content_type', 'unknown')
logger.info(f" Inhalt {k+1}: {content_name} ({content_type})") logger.info(f" Content {k+1}: {content_name} ({content_type})")
# Log-Einträge anzeigen logs = workflow.get('logs', [])
logger.info(f"Logs: {len(workflow.get('logs', []))}") logger.info(f"Logs: {len(logs)}")
for i, log in enumerate(workflow.get('logs', []))[:10]: # Begrenzung auf 10 Logs # Get only the first 10 logs
for i, log in enumerate(logs[:10]): # Apply the slice to logs, not enumerate
log_type = log.get('type', 'info') log_type = log.get('type', 'info')
log_message = log.get('message', '') log_message = log.get('message', '')
log_message_preview = log_message[:100] + '...' if len(log_message) > 100 else log_message log_message_preview = log_message[:100] + '...' if len(log_message) > 100 else log_message
@ -168,14 +169,14 @@ def analyze_workflow_result(workflow: Dict[str, Any]) -> None:
async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) -> None: async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) -> None:
""" """
Bereinigt die erstellten Testdateien. Cleans up the created test files.
Args: Args:
mandate_id: ID des Mandanten mandate_id: ID of the mandate
user_id: ID des Benutzers user_id: ID of the user
file_ids: Liste der zu löschenden Datei-IDs file_ids: List of file IDs to delete
""" """
logger.info("Beginne Bereinigung der Testdateien...") logger.info("Starting cleanup of test files...")
lucy_interface = get_lucydom_interface(mandate_id, user_id) lucy_interface = get_lucydom_interface(mandate_id, user_id)
@ -183,47 +184,47 @@ async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int])
try: try:
success = lucy_interface.delete_file(file_id) success = lucy_interface.delete_file(file_id)
if success: if success:
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht") logger.info(f"File with ID {file_id} successfully deleted")
else: else:
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}") logger.warning(f"Error deleting file with ID {file_id}")
except Exception as e: except Exception as e:
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}") logger.error(f"Error deleting file with ID {file_id}: {str(e)}")
logger.info("Bereinigung abgeschlossen") logger.info("Cleanup completed")
async def main(): async def main():
""" """
Hauptfunktion, die den gesamten Testprozess steuert. Main function that controls the entire test process.
""" """
# Testparameter # Test parameters
MANDATE_ID = 1 # Test-Mandanten-ID MANDATE_ID = 1 # Test mandate ID
USER_ID = 1 # Test-Benutzer-ID USER_ID = 1 # Test user ID
CLEANUP = True # Bereinigung nach dem Test CLEANUP = True # Cleanup after test
try: try:
logger.info("=== Test-Workflow für ChatManager gestartet ===") logger.info("=== ChatManager test workflow started ===")
# Schritt 1: Testdateien erstellen # Step 1: Create test files
text_file_id, image_file_id = await create_test_files(MANDATE_ID, USER_ID) text_file_id, image_file_id = await create_test_files(MANDATE_ID, USER_ID)
file_ids = [text_file_id, image_file_id] file_ids = [text_file_id, image_file_id]
# Schritt 2: Chat-Workflow ausführen # Step 2: Execute chat workflow
workflow_result = await run_chat_workflow(MANDATE_ID, USER_ID, file_ids) workflow_result = await run_chat_workflow(MANDATE_ID, USER_ID, file_ids)
# Schritt 3: Ergebnis analysieren # Step 3: Analyze result
analyze_workflow_result(workflow_result) analyze_workflow_result(workflow_result)
# Schritt 4: Optional bereinigen # Step 4: Optional cleanup
if CLEANUP: if CLEANUP:
await cleanup_test_files(MANDATE_ID, USER_ID, file_ids) await cleanup_test_files(MANDATE_ID, USER_ID, file_ids)
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===") logger.info("=== Test workflow successfully completed ===")
except Exception as e: except Exception as e:
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True) logger.error(f"Error in test workflow: {str(e)}", exc_info=True)
logger.info("=== Test-Workflow mit Fehler beendet ===") logger.info("=== Test workflow ended with error ===")
if __name__ == "__main__": if __name__ == "__main__":
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen # Create event loop for asyncio and execute main function
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
loop.run_until_complete(main()) loop.run_until_complete(main())

View file

@ -1,373 +0,0 @@
"""
Erweitertes Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads.
Bietet zusätzliche Konfigurationsmöglichkeiten und detailliertere Tests.
"""
import asyncio
import logging
import os
import sys
import argparse
import json
from typing import Dict, Any, List, Tuple, Optional
from datetime import datetime
# Logging konfigurieren
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger("test_workflow")
# Pfad zum Projektverzeichnis hinzufügen
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Module importieren
from modules.lucydom_interface import get_lucydom_interface
from modules.chat import get_chat_manager
class TestConfig:
"""Konfigurationsklasse für Testparameter"""
def __init__(self):
self.mandate_id = 1
self.user_id = 1
self.cleanup = True
self.save_results = True
self.results_dir = "test_results"
self.test_message = "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt."
self.text_file_content = """
Dies ist eine Test-Textdatei für den ChatManager-Workflow.
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung.
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten
und daraus relevante Informationen zu extrahieren.
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem
Chat-Workflow verwendet werden können.
"""
def parse_args() -> TestConfig:
"""Parst Kommandozeilenargumente"""
parser = argparse.ArgumentParser(description="Test für ChatManager-Workflow")
parser.add_argument("--mandate-id", type=int, default=1, help="ID des Mandanten")
parser.add_argument("--user-id", type=int, default=1, help="ID des Benutzers")
parser.add_argument("--no-cleanup", action="store_true", help="Testdateien nicht löschen")
parser.add_argument("--no-save", action="store_true", help="Ergebnisse nicht speichern")
parser.add_argument("--results-dir", type=str, default="test_results", help="Verzeichnis für Ergebnisse")
parser.add_argument("--message", type=str, help="Benutzernachricht für den Test")
args = parser.parse_args()
config = TestConfig()
config.mandate_id = args.mandate_id
config.user_id = args.user_id
config.cleanup = not args.no_cleanup
config.save_results = not args.no_save
config.results_dir = args.results_dir
if args.message:
config.test_message = args.message
return config
async def create_test_files(config: TestConfig) -> Tuple[int, int]:
"""
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch.
Args:
config: Testkonfiguration
Returns:
Tuple mit (text_file_id, image_file_id)
"""
logger.info("Erstelle Test-Dateien...")
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
# Textdatei erstellen
text_content = config.text_file_content
text_file_bytes = text_content.encode('utf-8')
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
text_file_id = text_file["id"]
logger.info(f"Textdatei erstellt mit ID: {text_file_id}")
# Bilddatei erstellen (einfaches 1x1 PNG)
# Base64-kodiertes 1x1 PNG
png_data = bytes.fromhex(
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
"89000000017352474200aece1ce90000000467414d410000b18f0bfc61050000"
"000970485973000016250000162501495224f00000001974455874536f667477"
"617265007777772e696e6b73636170652e6f72679bee3c1a0000000c49444154"
"08d763f8ffff3f0005fe02fec1cd59830000000049454e44ae426082"
)
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
image_file_id = image_file["id"]
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
return text_file_id, image_file_id
async def verify_uploaded_files(mandate_id: int, user_id: int, file_ids: List[int]) -> bool:
"""
Überprüft, ob die hochgeladenen Dateien korrekt in der Datenbank gespeichert wurden
Args:
mandate_id: ID des Mandanten
user_id: ID des Benutzers
file_ids: Liste der Datei-IDs
Returns:
True, wenn alle Dateien verfügbar sind
"""
logger.info("Überprüfe hochgeladene Dateien...")
lucy_interface = get_lucydom_interface(mandate_id, user_id)
all_files_available = True
for file_id in file_ids:
file = lucy_interface.get_file(file_id)
if file:
file_data = lucy_interface.get_file_data(file_id)
if file_data:
logger.info(f"Datei {file_id} ({file.get('name', 'Unbekannt')}, {file.get('mime_type', 'Unbekannt')}) ist verfügbar")
logger.info(f" Größe: {len(file_data)} Bytes")
else:
logger.error(f"Datei {file_id} hat keine Binärdaten")
all_files_available = False
else:
logger.error(f"Datei mit ID {file_id} nicht in der Datenbank gefunden")
all_files_available = False
return all_files_available
async def run_chat_workflow(config: TestConfig, file_ids: List[int]) -> Dict[str, Any]:
"""
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus.
Args:
config: Testkonfiguration
file_ids: Liste der Datei-IDs
Returns:
Das Workflow-Ergebnis
"""
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}")
# ChatManager initialisieren
chat_manager = get_chat_manager(config.mandate_id, config.user_id)
# Benutzeranfrage erstellen
user_input = {
"message": config.test_message,
"additional_fileids": file_ids
}
# Start-Zeit erfassen
start_time = datetime.now()
# Chat-Workflow ausführen
workflow_result = await chat_manager.chat_run(user_input)
# Ende-Zeit und Dauer berechnen
end_time = datetime.now()
duration = (end_time - start_time).total_seconds()
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}")
logger.info(f"Dauer: {duration:.2f} Sekunden")
return workflow_result
def analyze_workflow_result(workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Analysiert das Workflow-Ergebnis und gibt Statistiken zurück.
Args:
workflow: Das Workflow-Ergebnis
Returns:
Dictionary mit Analyseergebnissen
"""
logger.info("Analysiere Workflow-Ergebnis:")
# Basis-Informationen
analysis = {
"workflow_id": workflow.get("id"),
"status": workflow.get("status"),
"message_count": len(workflow.get("messages", [])),
"log_count": len(workflow.get("logs", [])),
"document_count": 0,
"roles": {},
"document_types": {},
"response_sizes": []
}
# Nachrichten analysieren
for message in workflow.get("messages", []):
# Rollen zählen
role = message.get("role", "unknown")
if role not in analysis["roles"]:
analysis["roles"][role] = 0
analysis["roles"][role] += 1
# Content-Größe bei Antworten
if role == "assistant":
content = message.get("content", "")
analysis["response_sizes"].append(len(content))
# Dokumente zählen und analysieren
documents = message.get("documents", [])
analysis["document_count"] += len(documents)
for doc in documents:
contents = doc.get("contents", [])
for content in contents:
content_type = content.get("content_type", "unknown")
if content_type not in analysis["document_types"]:
analysis["document_types"][content_type] = 0
analysis["document_types"][content_type] += 1
# Ausgabe für Log
logger.info(f"Workflow-ID: {analysis['workflow_id']}")
logger.info(f"Status: {analysis['status']}")
logger.info(f"Anzahl Nachrichten: {analysis['message_count']}")
logger.info(f"Anzahl Dokumente: {analysis['document_count']}")
logger.info(f"Rollen-Verteilung: {analysis['roles']}")
logger.info(f"Dokumenttypen: {analysis['document_types']}")
if analysis["response_sizes"]:
avg_size = sum(analysis["response_sizes"]) / len(analysis["response_sizes"])
logger.info(f"Durchschnittliche Antwortgröße: {avg_size:.2f} Zeichen")
# Detaillierte Nachrichteninformationen
for i, message in enumerate(workflow.get("messages", [])[:5]): # Begrenzung auf 5 Nachrichten
logger.info(f"Nachricht {i+1}:")
logger.info(f" Rolle: {message.get('role', 'unbekannt')}")
# Nur die ersten 100 Zeichen des Inhalts anzeigen
content = message.get("content", "")
content_preview = content[:100] + "..." if len(content) > 100 else content
logger.info(f" Inhalt: {content_preview}")
# Dokumente in der Nachricht anzeigen
documents = message.get("documents", [])
if documents:
logger.info(f" Dokumente: {len(documents)}")
for j, doc in enumerate(documents):
file_id = doc.get("file_id", "keine file_id")
logger.info(f" Dokument {j+1}: File-ID={file_id}")
return analysis
def save_test_results(config: TestConfig, workflow: Dict[str, Any], analysis: Dict[str, Any]) -> None:
"""
Speichert die Testergebnisse in einer Datei.
Args:
config: Testkonfiguration
workflow: Das vollständige Workflow-Ergebnis
analysis: Die Analyseergebnisse
"""
if not config.save_results:
return
# Ergebnisverzeichnis erstellen, falls es nicht existiert
os.makedirs(config.results_dir, exist_ok=True)
# Zeitstempel für eindeutige Dateinamen
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
# Speichere die Analyse
analysis_file = os.path.join(config.results_dir, f"analysis_{timestamp}.json")
with open(analysis_file, "w", encoding="utf-8") as f:
json.dump(analysis, f, indent=2, ensure_ascii=False)
logger.info(f"Analyse gespeichert in: {analysis_file}")
# Speichere den vollständigen Workflow (ohne große Binärdaten)
workflow_copy = workflow.copy()
# Entferne Binärdaten aus dem Export, um die Dateigröße zu reduzieren
for message in workflow_copy.get("messages", []):
if "documents" in message:
for doc in message.get("documents", []):
if "contents" in doc:
for content in doc.get("contents", []):
if "data" in content and isinstance(content["data"], bytes) and len(content["data"]) > 1000:
content["data"] = f"[{len(content['data'])} Bytes]"
workflow_file = os.path.join(config.results_dir, f"workflow_{timestamp}.json")
with open(workflow_file, "w", encoding="utf-8") as f:
# Konvertiere Bytes zu Strings für JSON-Serialisierung
json.dump(workflow_copy, f, indent=2, ensure_ascii=False, default=lambda o:
o.decode("utf-8") if isinstance(o, bytes) else str(o))
logger.info(f"Workflow gespeichert in: {workflow_file}")
async def cleanup_test_files(config: TestConfig, file_ids: List[int]) -> None:
"""
Bereinigt die erstellten Testdateien.
Args:
config: Testkonfiguration
file_ids: Liste der zu löschenden Datei-IDs
"""
if not config.cleanup:
logger.info("Bereinigung übersprungen (--no-cleanup)")
return
logger.info("Beginne Bereinigung der Testdateien...")
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
for file_id in file_ids:
try:
success = lucy_interface.delete_file(file_id)
if success:
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht")
else:
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}")
except Exception as e:
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}")
logger.info("Bereinigung abgeschlossen")
async def main():
"""
Hauptfunktion, die den gesamten Testprozess steuert.
"""
# Konfiguration laden
config = parse_args()
try:
logger.info("=== Test-Workflow für ChatManager gestartet ===")
logger.info(f"Mandate-ID: {config.mandate_id}, User-ID: {config.user_id}")
# Schritt 1: Testdateien erstellen
text_file_id, image_file_id = await create_test_files(config)
file_ids = [text_file_id, image_file_id]
# Schritt 2: Hochgeladene Dateien überprüfen
files_ok = await verify_uploaded_files(config.mandate_id, config.user_id, file_ids)
if not files_ok:
logger.error("Fehler bei den hochgeladenen Dateien, Test wird abgebrochen")
return
# Schritt 3: Chat-Workflow ausführen
workflow_result = await run_chat_workflow(config, file_ids)
# Schritt 4: Ergebnis analysieren
analysis = analyze_workflow_result(workflow_result)
# Schritt 5: Ergebnisse speichern
save_test_results(config, workflow_result, analysis)
# Schritt 6: Bereinigen
await cleanup_test_files(config, file_ids)
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===")
except Exception as e:
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True)
logger.info("=== Test-Workflow mit Fehler beendet ===")
if __name__ == "__main__":
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen
loop = asyncio.get_event_loop()
loop.run_until_complete(main())