diff --git a/config.ini b/config.ini index 4b46fb49..2cbce637 100644 --- a/config.ini +++ b/config.ini @@ -19,18 +19,6 @@ Connector_AiAnthropic_MODEL_NAME = claude-3-opus-20240229 Connector_AiAnthropic_TEMPERATURE = 0.2 Connector_AiAnthropic_MAX_TOKENS = 2000 -# Web scraping configuration -Connector_AiWebscraping_TIMEOUT = 10 -Connector_AiWebscraping_MAX_URLS = 3 -Connector_AiWebscraping_MAX_TOKENS = 30000 -Connector_AiWebscraping_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 -Connector_AiWebscraping_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q= -Connector_AiWebscraping_MAX_SEARCH_KEYWORDS = 3 -Connector_AiWebscraping_MAX_SEARCH_RESULTS = 5 - -# AgentService configuration -Module_AgentserviceInterface_UPLOAD_DIR = ./_uploads - # File management configuration File_Management_MAX_UPLOAD_SIZE_MB = 50 File_Management_CLEANUP_INTERVAL = 240 @@ -50,4 +38,17 @@ Security_PASSWORD_REQUIRE_LOWERCASE = True Security_PASSWORD_REQUIRE_NUMBERS = True Security_PASSWORD_REQUIRE_SPECIAL = True Security_FAILED_LOGIN_LIMIT = 5 -Security_LOCK_DURATION_MINUTES = 30 \ No newline at end of file +Security_LOCK_DURATION_MINUTES = 30 + +# Agent Webcrawler configuration +Agent_Webcrawler_TIMEOUT = 10 +Agent_Webcrawler_MAX_URLS = 3 +Agent_Webcrawler_MAX_TOKENS = 30000 +Agent_Webcrawler_USER_AGENT = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36 +Agent_Webcrawler_SEARCH_ENGINE = https://html.duckduckgo.com/html/?q= +Agent_Webcrawler_MAX_SEARCH_KEYWORDS = 3 +Agent_Webcrawler_MAX_SEARCH_RESULTS = 5 + +# Agent Coder configuration +Agent_Coder_INSTALL_TIMEOUT = 180 +Agent_Coder_EXECUTION_TIMEOUT = 60 diff --git a/connectors/connector_db_json.py b/connectors/connector_db_json.py index 5371c6be..183d0bc5 100644 --- a/connectors/connector_db_json.py +++ b/connectors/connector_db_json.py @@ -10,76 +10,76 @@ logger = logging.getLogger(__name__) class DatabaseConnector: """ - Ein Konnektor für JSON-basierte Datenspeicherung. - Stellt generische Datenbankoperationen bereit. + A connector for JSON-based data storage. + Provides generic database operations. """ def __init__(self, db_host: str, db_database: str, db_user: str = None, db_password: str = None, mandate_id: int = None, user_id: int = None): """ - Initialisiert den JSON-Datenbankkonnektor. + Initializes the JSON database connector. Args: - db_host: Verzeichnis für die JSON-Dateien - db_database = Datenbankname - db_user: Benutzername für die Authentifizierung (optional) - db_password: API-Schlüssel für die Authentifizierung (optional) - mandate_id: Kontext-Parameter für den Mandanten - user_id: Kontext-Parameter für den Benutzer + db_host: Directory for the JSON files + db_database = Database name + db_user: Username for authentication (optional) + db_password: API key for authentication (optional) + mandate_id: Context parameter for the tenant + user_id: Context parameter for the user """ - # Speichere die Eingabeparameter + # Store the input parameters self.db_host = db_host self.db_database = db_database self.db_user = db_user self.db_password = db_password - # Prüfe, ob Kontext-Parameter gesetzt sind + # Check if context parameters are set if mandate_id is None or user_id is None: - raise ValueError("mandate_id und user_id müssen gesetzt sein") + raise ValueError("mandate_id and user_id must be set") - # Stelle sicher, dass das Datenbankverzeichnis existiert + # Ensure the database directory exists self.db_folder=os.path.join(self.db_host,self.db_database) os.makedirs(self.db_folder, exist_ok=True) - # Cache für geladene Daten + # Cache for loaded data self._tables_cache = {} - # System-Tabelle initialisieren + # Initialize system table self._system_table_name = "_system" self._initialize_system_table() - # Temporär mandate_id und user_id speichern + # Temporarily store mandate_id and user_id self._mandate_id = mandate_id self._user_id = user_id - # Wenn mandate_id oder user_id 0 sind, versuche die initialen IDs zu verwenden + # If mandate_id or user_id are 0, try to use the initial IDs if mandate_id == 0: initial_mandate_id = self.get_initial_id("mandates") if initial_mandate_id is not None: self._mandate_id = initial_mandate_id - logger.info(f"Verwende initiale mandate_id: {initial_mandate_id} statt 0") + logger.info(f"Using initial mandate_id: {initial_mandate_id} instead of 0") if user_id == 0: initial_user_id = self.get_initial_id("users") if initial_user_id is not None: self._user_id = initial_user_id - logger.info(f"Verwende initiale user_id: {initial_user_id} statt 0") + logger.info(f"Using initial user_id: {initial_user_id} instead of 0") - # Setze die effektiven IDs als Eigenschaften + # Set the effective IDs as properties self.mandate_id = self._mandate_id self.user_id = self._user_id - logger.info(f"DatabaseConnector initialisiert für Verzeichnis: {self.db_folder}") - logger.debug(f"Kontext: mandate_id={self.mandate_id}, user_id={self.user_id}") + logger.info(f"DatabaseConnector initialized for directory: {self.db_folder}") + logger.debug(f"Context: mandate_id={self.mandate_id}, user_id={self.user_id}") def _initialize_system_table(self): - """Initialisiert die System-Tabelle, falls sie noch nicht existiert.""" + """Initializes the system table if it doesn't exist yet.""" system_table_path = self._get_table_path(self._system_table_name) if not os.path.exists(system_table_path): empty_system_table = {} self._save_system_table(empty_system_table) - logger.info(f"System-Tabelle initialisiert in {system_table_path}") + logger.info(f"System table initialized in {system_table_path}") def _load_system_table(self) -> Dict[str, int]: - """Lädt die System-Tabelle mit den initialen IDs.""" + """Loads the system table with the initial IDs.""" system_table_path = self._get_table_path(self._system_table_name) try: if os.path.exists(system_table_path): @@ -88,66 +88,66 @@ class DatabaseConnector: else: return {} except Exception as e: - logger.error(f"Fehler beim Laden der System-Tabelle: {e}") + logger.error(f"Error loading the system table: {e}") return {} def _save_system_table(self, data: Dict[str, int]) -> bool: - """Speichert die System-Tabelle mit den initialen IDs.""" + """Saves the system table with the initial IDs.""" system_table_path = self._get_table_path(self._system_table_name) try: with open(system_table_path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) return True except Exception as e: - logger.error(f"Fehler beim Speichern der System-Tabelle: {e}") + logger.error(f"Error saving the system table: {e}") return False def _get_table_path(self, table: str) -> str: - """Gibt den vollständigen Pfad zu einer Tabellendatei zurück""" + """Returns the full path to a table file""" return os.path.join(self.db_folder, f"{table}.json") def _load_table(self, table: str) -> List[Dict[str, Any]]: - """Lädt eine Tabelle aus der entsprechenden JSON-Datei""" + """Loads a table from the corresponding JSON file""" path = self._get_table_path(table) - # Wenn die Tabelle die System-Tabelle ist, lade sie direkt + # If the table is the system table, load it directly if table == self._system_table_name: - return [] # Die System-Tabelle wird nicht wie normale Tabellen behandelt + return [] # The system table is not treated like normal tables - # Wenn die Tabelle bereits im Cache ist, verwende den Cache + # If the table is already in the cache, use the cache if table in self._tables_cache: - # logger.info(f"Lade Tabelle {table} aus Cache") + # logger.info(f"Loading table {table} from cache") return self._tables_cache[table] - # Ansonsten lade die Datei + # Otherwise load the file try: if os.path.exists(path): - # logger.info(f"Lade Tabelle {table} aus JSON {path}") + # logger.info(f"Loading table {table} from JSON {path}") with open(path, 'r', encoding='utf-8') as f: data = json.load(f) self._tables_cache[table] = data - # Wenn Daten geladen wurden und noch keine initiale ID registriert ist, - # registriere die ID des ersten Datensatzes (falls vorhanden) + # If data was loaded and no initial ID is registered yet, + # register the ID of the first record (if available) if data and not self.has_initial_id(table): if "id" in data[0]: - self.register_initial_id(table, data[0]["id"]) - logger.info(f"Initiale ID {data[0]['id']} für Tabelle {table} nachträglich registriert") + self._register_initial_id(table, data[0]["id"]) + logger.info(f"Initial ID {data[0]['id']} for table {table} retroactively registered") return data else: - # Wenn die Datei nicht existiert, erstelle eine leere Tabelle - logger.info(f"Neue Tabelle {table}") + # If the file doesn't exist, create an empty table + logger.info(f"New table {table}") self._tables_cache[table] = [] self._save_table(table, []) return [] except Exception as e: - logger.error(f"Fehler beim Laden der Tabelle {table}: {e}") + logger.error(f"Error loading table {table}: {e}") return [] def _save_table(self, table: str, data: List[Dict[str, Any]]) -> bool: - """Speichert eine Tabelle in der entsprechenden JSON-Datei""" - # Die System-Tabelle wird speziell behandelt + """Saves a table to the corresponding JSON file""" + # The system table is handled specially if table == self._system_table_name: return False @@ -156,43 +156,44 @@ class DatabaseConnector: with open(path, 'w', encoding='utf-8') as f: json.dump(data, f, indent=2, ensure_ascii=False) - # Aktualisiere den Cache + # Update the cache self._tables_cache[table] = data return True except Exception as e: - logger.error(f"Fehler beim Speichern der Tabelle {table}: {e}") + logger.error(f"Error saving table {table}: {e}") return False def _filter_by_context(self, records: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ - Filtert Datensätze nach dem Mandanten- und Benutzerkontext, - sofern diese Felder im Datensatz existieren. + Filters records by tenant and user context, + if these fields exist in the record. """ filtered_records = [] for record in records: - # Prüfe, ob mandate_id im Datensatz existiert und nicht null ist + # Check if mandate_id exists in the record and is not null has_mandate = "mandate_id" in record and record["mandate_id"] is not None and record["mandate_id"] != "" - # Prüfe, ob user_id im Datensatz existiert und nicht null ist + # Check if user_id exists in the record and is not null has_user = "user_id" in record and record["user_id"] is not None and record["user_id"] != "" - # Wenn beides existiert, filtere entsprechend + # If both exist, filter accordingly if has_mandate and has_user: if record["mandate_id"] == self.mandate_id: filtered_records.append(record) - # Wenn nur mandate_id existiert + # If only mandate_id exists elif has_mandate and not has_user: if record["mandate_id"] == self.mandate_id: filtered_records.append(record) - # Wenn weder mandate_id noch user_id existieren, füge den Datensatz hinzu + # If neither mandate_id nor user_id exist, add the record elif not has_mandate and not has_user: filtered_records.append(record) return filtered_records def _apply_record_filter(self, records: List[Dict[str, Any]], record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]: - """Wendet einen Datensatzfilter auf die Datensätze an""" + """Applies a record filter to the records""" + if not record_filter: return records @@ -202,17 +203,17 @@ class DatabaseConnector: match = True for field, value in record_filter.items(): - # Prüfen, ob das Feld existiert + # Check if the field exists if field not in record: match = False break - # Wenn der Filterwert ein Integer-String ist und das Datensatzfeld ein Integer + # If the filter value is an integer string and the record field is an integer if isinstance(value, str) and value.isdigit() and isinstance(record[field], int): if record[field] != int(value): match = False break - # Sonst direkter Vergleich + # Otherwise direct comparison elif record[field] != value: match = False break @@ -222,66 +223,120 @@ class DatabaseConnector: return filtered_records + def _register_initial_id(self, table: str, initial_id: int) -> bool: + """ + Registers the initial ID for a table. + + Args: + table: Name of the table + initial_id: The initial ID + + Returns: + True on success, False on error + """ + try: + # Load the current system table + system_data = self._load_system_table() + + # Only register if not already present + if table not in system_data: + system_data[table] = initial_id + success = self._save_system_table(system_data) + if success: + logger.info(f"Initial ID {initial_id} for table {table} registered") + return success + return True # If already present, this is not an error + except Exception as e: + logger.error(f"Error registering the initial ID for table {table}: {e}") + return False + + def _remove_initial_id(self, table: str) -> bool: + """ + Removes the initial ID for a table from the system table. + + Args: + table: Name of the table + + Returns: + True on success, False on error + """ + try: + # Load the current system table + system_data = self._load_system_table() + + # Remove the entry if it exists + if table in system_data: + del system_data[table] + success = self._save_system_table(system_data) + if success: + logger.info(f"Initial ID for table {table} removed from system table") + return success + return True # If not present, this is not an error + except Exception as e: + logger.error(f"Error removing initial ID for table {table}: {e}") + return False + # Public API def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]: """ - Gibt eine Liste aller verfügbaren Tabellen zurück. + Returns a list of all available tables. Args: - filter_criteria: Optionale Filterkriterien (nicht implementiert) + filter_criteria: Optional filter criteria (not implemented) Returns: - Liste der Tabellennamen + List of table names """ + tables = [] try: for filename in os.listdir(self.db_folder): if filename.endswith('.json') and not filename.startswith('_'): - table_name = filename[:-5] # Entferne die .json-Endung + table_name = filename[:-5] # Remove the .json extension tables.append(table_name) except Exception as e: - logger.error(f"Fehler beim Lesen des Datenbankverzeichnisses: {e}") + logger.error(f"Error reading the database directory: {e}") return tables def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]: """ - Gibt eine Liste aller Felder einer Tabelle zurück. + Returns a list of all fields in a table. Args: - table: Name der Tabelle - filter_criteria: Optionale Filterkriterien (nicht implementiert) + table: Name of the table + filter_criteria: Optional filter criteria (not implemented) Returns: - Liste der Feldnamen + List of field names """ - # Lade die Tabellendaten + # Load the table data data = self._load_table(table) if not data: return [] - # Nehme den ersten Datensatz als Referenz für die Felder + # Take the first record as a reference for the fields fields = list(data[0].keys()) if data else [] return fields def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]: """ - Gibt ein Schema-Objekt für eine Tabelle zurück mit Datentypen und Labels. + Returns a schema object for a table with data types and labels. Args: - table: Name der Tabelle - language: Sprache für die Labels (optional) - filter_criteria: Optionale Filterkriterien (nicht implementiert) + table: Name of the table + language: Language for the labels (optional) + filter_criteria: Optional filter criteria (not implemented) Returns: - Schema-Objekt mit Feldern, Datentypen und Labels + Schema object with fields, data types and labels """ - # Lade die Tabellendaten + # Load the table data data = self._load_table(table) schema = {} @@ -289,18 +344,18 @@ class DatabaseConnector: if not data: return schema - # Nehme den ersten Datensatz als Referenz für die Felder und Datentypen + # Take the first record as a reference for the fields and data types first_record = data[0] for field, value in first_record.items(): - # Bestimme den Datentyp + # Determine the data type data_type = type(value).__name__ - # Label erstellen (Standardwert ist der Feldname) + # Create label (default is the field name) label = field - # Wenn model_info verfügbar ist, versuche das Label aus dem Modell zu holen - # Implementierung hängt vom tatsächlichen Modell ab + # If model_info is available, try to get the label from the model + # Implementation depends on the actual model schema[field] = { "type": data_type, @@ -311,27 +366,27 @@ class DatabaseConnector: def get_recordset(self, table: str, field_filter: Dict[str, Any] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]: """ - Gibt eine Liste von Datensätzen aus einer Tabelle zurück, gefiltert nach Kriterien. + Returns a list of records from a table, filtered by criteria. Args: - table: Name der Tabelle - field_filter: Filter für Felder (welche Felder zurückgegeben werden sollen) - record_filter: Filter für Datensätze (welche Datensätze zurückgegeben werden sollen) + table: Name of the table + field_filter: Filter for fields (which fields should be returned) + record_filter: Filter for records (which records should be returned) Returns: - Liste der gefilterten Datensätze + List of filtered records """ - # Lade die Tabellendaten + # Load the table data data = self._load_table(table) - # Filtere nach Mandanten- und Benutzerkontext + # Filter by tenant and user context filtered_data = self._filter_by_context(data) - # Wende record_filter an, wenn vorhanden + # Apply record_filter if available if record_filter: filtered_data = self._apply_record_filter(filtered_data, record_filter) - # Wenn field_filter vorhanden ist, reduziere die Felder + # If field_filter is available, reduce the fields if field_filter and isinstance(field_filter, list): result = [] for record in filtered_data: @@ -346,187 +401,157 @@ class DatabaseConnector: def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]: """ - Erstellt einen neuen Datensatz in der Tabelle. + Creates a new record in the table. Args: - table: Name der Tabelle - record_data: Daten für den neuen Datensatz + table: Name of the table + record_data: Data for the new record Returns: - Der erstellte Datensatz + The created record """ - # Lade die Tabellendaten + # Load the table data data = self._load_table(table) - # Füge mandate_id und user_id hinzu, falls nicht vorhanden oder 0 + # Add mandate_id and user_id if not present or 0 if "mandate_id" not in record_data or record_data["mandate_id"] == 0: record_data["mandate_id"] = self.mandate_id if "user_id" not in record_data or record_data["user_id"] == 0: record_data["user_id"] = self.user_id - # Bestimme die nächste ID, falls nicht vorhanden + # Determine the next ID if not present if "id" not in record_data: next_id = 1 if data: next_id = max(record["id"] for record in data if "id" in record) + 1 record_data["id"] = next_id - # Wenn die Tabelle leer ist und eine System-ID registriert werden soll + # If the table is empty and a system ID should be registered if not data: - self.register_initial_id(table, record_data["id"]) - logger.info(f"Initiale ID {record_data['id']} für Tabelle {table} wurde registriert") + self._register_initial_id(table, record_data["id"]) + logger.info(f"Initial ID {record_data['id']} for table {table} has been registered") - # Füge den neuen Datensatz hinzu + # Add the new record data.append(record_data) - # Speichere die aktualisierte Tabelle + # Save the updated table if self._save_table(table, data): return record_data else: - raise ValueError(f"Fehler beim Erstellen des Datensatzes in Tabelle {table}") + raise ValueError(f"Error creating the record in table {table}") def record_delete(self, table: str, record_id: Union[str, int]) -> bool: """ - Löscht einen Datensatz aus der Tabelle. + Deletes a record from the table. Args: - table: Name der Tabelle - record_id: ID des zu löschenden Datensatzes + table: Name of the table + record_id: ID of the record to delete Returns: - True bei Erfolg, False bei Fehler + True on success, False on error """ - # Lade die Tabellendaten + # Load table data data = self._load_table(table) - # Prüfe, ob es sich um die initiale ID handelt - initial_id = self.get_initial_id(table) - if initial_id is not None and initial_id == record_id: - logger.warning(f"Versuch, den initialen Datensatz mit ID {record_id} aus Tabelle {table} zu löschen, wurde verhindert") - return False - - # Suche den Datensatz + # Search for the record for i, record in enumerate(data): if "id" in record and record["id"] == record_id: - # Prüfe, ob der Datensatz zum aktuellen Mandanten gehört + # Check if the record belongs to the current mandate if "mandate_id" in record and record["mandate_id"] != self.mandate_id: raise ValueError("Not your mandate") - # Lösche den Datensatz + # Check if it's an initial record + initial_id = self.get_initial_id(table) + if initial_id is not None and initial_id == record_id: + # Remove this entry from the system table + self._remove_initial_id(table) + logger.info(f"Initial ID {record_id} for table {table} has been removed from the system table") + + # Delete the record del data[i] - # Speichere die aktualisierte Tabelle + # Save the updated table return self._save_table(table, data) - # Datensatz nicht gefunden + # Record not found return False - + def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]: """ - Ändert einen Datensatz in der Tabelle. + Modifies a record in the table. Args: - table: Name der Tabelle - record_id: ID des zu ändernden Datensatzes - record_data: Neue Daten für den Datensatz + table: Name of the table + record_id: ID of the record to modify + record_data: New data for the record Returns: - Der aktualisierte Datensatz + The updated record """ - # Lade die Tabellendaten + # Load table data data = self._load_table(table) - # Suche den Datensatz + # Search for the record for i, record in enumerate(data): if "id" in record and record["id"] == record_id: - # Prüfe, ob der Datensatz zum aktuellen Mandanten gehört + # Check if the record belongs to the current mandate if "mandate_id" in record and record["mandate_id"] != self.mandate_id: raise ValueError("Not your mandate") - # Verhindere Änderung der ID bei initialem Datensatz - initial_id = self.get_initial_id(table) - if initial_id is not None and initial_id == record_id and "id" in record_data and record_data["id"] != record_id: - raise ValueError(f"Die ID des initialen Datensatzes in Tabelle {table} kann nicht geändert werden") + # Prevent changing the ID + if "id" in record_data and record_data["id"] != record_id: + raise ValueError(f"The ID of a record in table {table} cannot be changed") - # Aktualisiere den Datensatz + # Update the record for key, value in record_data.items(): data[i][key] = value - # Speichere die aktualisierte Tabelle + # Save the updated table if self._save_table(table, data): return data[i] else: - raise ValueError(f"Fehler beim Aktualisieren des Datensatzes in Tabelle {table}") + raise ValueError(f"Error updating record in table {table}") - # Datensatz nicht gefunden - raise ValueError(f"Datensatz mit ID {record_id} nicht gefunden in Tabelle {table}") - - - # System-Tabellen-Funktionen - - def register_initial_id(self, table: str, initial_id: int) -> bool: - """ - Registriert die initiale ID für eine Tabelle. - - Args: - table: Name der Tabelle - initial_id: Die initiale ID - - Returns: - True bei Erfolg, False bei Fehler - """ - try: - # Lade die aktuelle System-Tabelle - system_data = self._load_system_table() - - # Nur registrieren, wenn noch nicht vorhanden - if table not in system_data: - system_data[table] = initial_id - success = self._save_system_table(system_data) - if success: - logger.info(f"Initiale ID {initial_id} für Tabelle {table} registriert") - return success - return True # Wenn bereits vorhanden, ist das kein Fehler - except Exception as e: - logger.error(f"Fehler beim Registrieren der initialen ID für Tabelle {table}: {e}") - return False - - def get_initial_id(self, table: str) -> Optional[int]: - """ - Gibt die initiale ID für eine Tabelle zurück. - - Args: - table: Name der Tabelle - - Returns: - Die initiale ID oder None, wenn nicht vorhanden - """ - system_data = self._load_system_table() - initial_id = system_data.get(table) - if initial_id is None: - logger.debug(f"Keine initiale ID für Tabelle {table} gefunden") - return initial_id - + # Record not found + raise ValueError(f"Record with ID {record_id} not found in table {table}") + def has_initial_id(self, table: str) -> bool: """ - Prüft, ob eine initiale ID für eine Tabelle registriert ist. + Checks if an initial ID is registered for a table. Args: - table: Name der Tabelle + table: Name of the table Returns: - True, wenn eine initiale ID registriert ist, sonst False + True if an initial ID is registered, otherwise False """ system_data = self._load_system_table() return table in system_data - def get_all_initial_ids(self) -> Dict[str, int]: + def get_initial_id(self, table: str) -> Optional[int]: """ - Gibt alle registrierten initialen IDs zurück. + Returns the initial ID for a table. + Args: + table: Name of the table + Returns: - Dictionary mit Tabellennamen als Schlüssel und initialen IDs als Werte + The initial ID or None if not present """ system_data = self._load_system_table() - return system_data.copy() # Kopie zurückgeben, um das Original zu schützen \ No newline at end of file + initial_id = system_data.get(table) + if initial_id is None: + logger.debug(f"No initial ID found for table {table}") + return initial_id + + def get_all_initial_ids(self) -> Dict[str, int]: + """ + Returns all registered initial IDs. + + Returns: + Dictionary with table names as keys and initial IDs as values + """ + system_data = self._load_system_table() + return system_data.copy() # Return a copy to protect the original \ No newline at end of file diff --git a/modules/_backup_chat_agent_coder copy.py b/modules/_backup_chat_agent_coder copy.py new file mode 100644 index 00000000..98b16a6a --- /dev/null +++ b/modules/_backup_chat_agent_coder copy.py @@ -0,0 +1,814 @@ +""" +Coder agent for development and execution of Python code. +Optimized for the new task-based processing. +""" + +import logging +import json +import re +import uuid +import os +import subprocess +import tempfile +import shutil +import sys +from typing import Dict, Any, List, Optional, Tuple + +from modules.chat_registry import AgentBase + +logger = logging.getLogger(__name__) + + +class AgentCoder(AgentBase): + """Agent for development and execution of Python code""" + + def __init__(self): + """Initialize the coder agent""" + super().__init__() + self.name = "coder" + self.description = "Develops and executes Python code for data processing and automation" + self.capabilities = [ + "code_development", + "data_processing", + "file_processing", + "automation", + "code_execution" + ] + + # Executor settings + self.executor_timeout = 60 # seconds + self.executor_memory_limit = 512 # MB + + # AI service settings + self.ai_temperature = 0.1 # Lower temperature for deterministic code generation + + # Auto-correction settings + self.max_correction_attempts = 3 # Maximum number of correction attempts + + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service + + async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: + """ + Process a standardized task structure and perform code development/execution. + + Args: + task: A dictionary containing: + - task_id: Unique ID for this task + - prompt: The main instruction for the agent + - input_documents: List of documents to process + - output_specifications: List of required output documents + - context: Additional contextual information + + Returns: + A dictionary containing: + - feedback: Text response explaining the code execution + - documents: List of created document objects + """ + try: + # Extract relevant task information + prompt = task.get("prompt", "") + input_documents = task.get("input_documents", []) + output_specs = task.get("output_specifications", []) + context_info = task.get("context", {}) + + # Check if AI service is available + if not self.ai_service: + logger.error("No AI service configured for the Coder agent") + return { + "feedback": "The Coder agent is not properly configured.", + "documents": [] + } + + # Extract context from input documents + document_context = self._extract_document_context(input_documents) + + # Generate code based on the prompt and document context + logger.info("Generating code based on the task") + code_to_execute, requirements = await self._generate_code_from_prompt(prompt, document_context) + + if not code_to_execute: + logger.warning("AI couldn't generate any code") + return { + "feedback": "I couldn't generate executable code based on the task. Please provide more detailed instructions.", + "documents": [] + } + + logger.info(f"Code generated with AI ({len(code_to_execute)} characters)") + + # Collect created documents + generated_documents = [] + + # Add code as first document + code_doc = { + "label": "generated_code.py", + "content": code_to_execute + } + generated_documents.append(code_doc) + + # Execute code with auto-correction loop + execution_context = { + "input_documents": input_documents, + "task": task + } + + # Enhanced execution with auto-correction + result, attempts_info = await self._execute_with_auto_correction( + code_to_execute, + requirements, + execution_context, + prompt # Original prompt/message + ) + + # Create output documents based on execution result and output specifications + if result.get("success", False): + # Code execution successful + output = result.get("output", "") + execution_result = result.get("result") + logger.info("Code executed successfully") + + # Determine output type of the result + result_docs = self._generate_result_documents( + attempts_info[-1]["code"], # Last successful code + output, + execution_result, + output_specs + ) + + # Add result documents + generated_documents.extend(result_docs) + + # Create feedback for successful execution + feedback = f"I successfully executed the code and generated {len(result_docs)} output files." + if attempts_info and len(attempts_info) > 1: + feedback += f" (This required {len(attempts_info)-1} correction attempts)" + + else: + # Code execution failed after all attempts + error = result.get("error", "Unknown error") + logger.error(f"Error in code execution after all correction attempts: {error}") + + # Add error log as additional document + error_doc = { + "label": "execution_error.txt", + "content": f"Execution error:\n\n{error}" + } + generated_documents.append(error_doc) + + # Create feedback for failed execution + feedback = f"An error occurred during code execution after {len(attempts_info)} correction attempts." + + # If no specific outputs requested, create standard outputs + if not output_specs and result.get("success", False): + # Add standard output document + output_doc = { + "label": "execution_output.txt", + "content": output + } + generated_documents.append(output_doc) + + # If a result is available, also add as JSON document + if execution_result: + result_json = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) + result_doc = { + "label": "execution_result.json", + "content": result_json + } + generated_documents.append(result_doc) + + return { + "feedback": feedback, + "documents": generated_documents + } + + except Exception as e: + error_msg = f"Error during processing by the Coder agent: {str(e)}" + logger.error(error_msg) + return { + "feedback": f"An error occurred during code processing: {str(e)}", + "documents": [] + } + + def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str: + """ + Extract context from input documents for code generation. + + Args: + documents: List of document objects + + Returns: + Extracted context as text + """ + context_parts = [] + + for doc in documents: + doc_name = doc.get("name", "Unnamed document") + context_parts.append(f"--- {doc_name} ---") + + for content in doc.get("contents", []): + if content.get("metadata", {}).get("is_text", False): + context_parts.append(content.get("data", "")) + + return "\n\n".join(context_parts) + + def _generate_result_documents(self, code: str, output: str, execution_result: Any, + output_specs: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """ + Generate output documents based on execution results and specifications. + + Args: + code: Executed code + output: Text output of the execution + execution_result: Result object from execution + output_specs: Output specifications + + Returns: + List of generated document objects + """ + documents = [] + + # If no specific outputs requested + if not output_specs: + return documents + + # Generate appropriate document for each requested output + for spec in output_specs: + output_label = spec.get("label", "") + output_description = spec.get("description", "") + + # Determine output type based on file extension + format_type = self._determine_format_type(output_label) + + # Generate document content based on format and output + if "code" in output_label.lower() or format_type in ["py", "js", "html", "css"]: + # Code document + documents.append({ + "label": output_label, + "content": code + }) + elif "output" in output_label.lower() or format_type == "txt": + # Output document + documents.append({ + "label": output_label, + "content": output + }) + elif format_type in ["json", "yml", "yaml"] and execution_result: + # JSON result document + if isinstance(execution_result, (dict, list)): + content = json.dumps(execution_result, indent=2) + else: + content = str(execution_result) + + documents.append({ + "label": output_label, + "content": content + }) + else: + # Generic result document (fallback) + result_str = "" + if execution_result: + if isinstance(execution_result, (dict, list)): + result_str = json.dumps(execution_result, indent=2) + else: + result_str = str(execution_result) + + documents.append({ + "label": output_label, + "content": f"Code output:\n\n{output}\n\nResult:\n\n{result_str}" + }) + + return documents + + def _determine_format_type(self, output_label: str) -> str: + """ + Determine the format type based on the filename. + + Args: + output_label: Output filename + + Returns: + Format type (py, js, json, txt, etc.) + """ + if not '.' in output_label: + return "txt" # Default format + + extension = output_label.split('.')[-1].lower() + return extension + + async def _execute_with_auto_correction( + self, + initial_code: str, + requirements: List[str], + context: Dict[str, Any], + original_prompt: str + ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: + """ + Execute code with automatic error correction and retry attempts. + + Args: + initial_code: The initial Python code + requirements: List of required packages + context: Additional context for execution + original_prompt: The original user request/prompt + + Returns: + Tuple of (final execution result, list of attempt info dictionaries) + """ + # Initialize tracking data + current_code = initial_code + current_requirements = requirements.copy() if requirements else [] + attempts_info = [] + + # Execute with correction loop + for attempt in range(1, self.max_correction_attempts + 1): + if attempt == 1: + logger.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})") + else: + logger.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})") + + # Execute current code version + result = await self._execute_code(current_code, current_requirements, context) + + # Record attempt information + attempts_info.append({ + "attempt": attempt, + "code": current_code, + "error": result.get("error", ""), + "success": result.get("success", False) + }) + + # Check if execution was successful + if result.get("success", False): + # Success! Return result and attempt info + return result, attempts_info + + # Failed execution - check if max attempt limit reached + if attempt >= self.max_correction_attempts: + logger.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached") + break + + # Correct code based on the error + error_message = result.get("error", "Unknown error") + + logger.info(f"Attempting to fix code error: {error_message[:200]}...") + + # Generate corrected code + corrected_code, new_requirements = await self._generate_code_correction( + current_code, + error_message, + original_prompt, + current_requirements + ) + + # Update for next attempt + if corrected_code: + current_code = corrected_code + + # Add new requirements + if new_requirements: + for req in new_requirements: + if req not in current_requirements: + current_requirements.append(req) + logger.info(f"Added new requirement: {req}") + else: + # Correction couldn't be generated, end loop + logger.warning("Couldn't generate code correction") + break + + # If we reach here, all attempts failed - return last result and attempt info + return result, attempts_info + + async def _generate_code_correction( + self, + code: str, + error_message: str, + original_prompt: str, + current_requirements: List[str] = None + ) -> Tuple[str, List[str]]: + """ + Generate a corrected version of code based on error messages. + + Args: + code: The code that generated errors + error_message: The error message to fix + original_prompt: The original task/requirements + current_requirements: List of currently required packages + + Returns: + Tuple of (corrected code, new requirements list) + """ + try: + # Create detailed prompt for code correction + correction_prompt = f"""You need to fix an error in Python code. The code was written for this task: + +ORIGINAL TASK: +{original_prompt} + +CURRENT CODE: +```python +{code} +``` + +ERROR MESSAGE: +``` +{error_message} +``` + +CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"} + +Your task is to analyze the error and provide a corrected version of the code. +Focus specifically on fixing the error while maintaining the original functionality. + +Common fixes include: +- Fixing syntax errors (missing parentheses, indentation, etc.) +- Solving import errors by adding appropriate requirements +- Correcting file paths or handling "file not found" errors +- Adding error handling for specific edge cases +- Fixing logical errors in the code + +FORMATTING GUIDELINES: +1. Provide ONLY the complete corrected Python code WITHOUT explanations +2. Do NOT use code block markers like ```python or ``` +3. Do NOT explain what the code does before or after +4. Do NOT add any text that isn't valid Python code +5. Start your answer directly with valid Python code +6. End your answer with valid Python code + +If you need to add new required packages, place them in a specially formatted comment at the beginning of your code as follows: +# REQUIREMENTS: package1,package2,package3 + +Your entire answer must be valid Python that can be executed without modifications. +""" + + # Create messages for API + messages = [ + {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, error-free Python code, without explanations, markdown formatting, or text that isn't code."}, + {"role": "user", "content": correction_prompt} + ] + + # Call API with very low temperature for deterministic corrections + generated_content = await self.ai_service.call_api( + messages, + temperature=0.1 + ) + + # Clean up the generated content to ensure it's only valid Python code + fixed_code = self._clean_code(generated_content) + + # Extract requirements from special comment at beginning of code + new_requirements = [] + for line in fixed_code.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + req_str = line.replace("# REQUIREMENTS:", "").strip() + new_requirements = [r.strip() for r in req_str.split(',') if r.strip()] + break + + return fixed_code, new_requirements + + except Exception as e: + logging.error(f"Error generating code correction: {str(e)}") + # Return None to indicate failure + return None, [] + + def _clean_code(self, code: str) -> str: + """ + Clean code by removing markdown code block markers and other formatting artifacts. + + Args: + code: The code string to clean + + Returns: + Cleaned code string + """ + # Remove code block markers at beginning/end + code = re.sub(r'^```(?:python)?\s*', '', code) + code = re.sub(r'```\s*$', '', code) + + # Process lines in reverse order to start from the end + lines = code.split('\n') + clean_lines = [] + in_trailing_markdown = False + + for line in reversed(lines): + stripped = line.strip() + + # Check if this line contains only backticks (``` or ` or ``) + if re.match(r'^`{1,3}$', stripped): + in_trailing_markdown = True + continue + + # If we've reached actual code, no more trailing markdown consideration + if stripped and not in_trailing_markdown: + in_trailing_markdown = False + + # Add this line if it's not part of trailing markdown + if not in_trailing_markdown: + clean_lines.insert(0, line) + + # Rejoin lines + clean_code = '\n'.join(clean_lines) + + # Final cleanup for any remaining backticks + clean_code = re.sub(r'`{1,3}\s*', '', clean_code) + + return clean_code.strip() + + async def _generate_code_from_prompt(self, prompt: str, document_context: str) -> Tuple[str, List[str]]: + """ + Generate Python code from a prompt using the AI service. + + Args: + prompt: The prompt to generate code from + document_context: Context extracted from documents + + Returns: + Tuple of (generated Python code, required packages) + """ + try: + # Prepare prompt for code generation + ai_prompt = f"""Generate Python code to solve the following task: + +TASK: +{prompt} + +PROVIDED CONTEXT: +{document_context if document_context else "No additional context available."} + +IMPORTANT REQUIREMENTS: +1. Your code MUST define a 'result' variable to store the final result. +2. At the end of your script, the result variable should be output. +3. Make your 'result' variable a dictionary or other JSON-serializable data structure containing all relevant outputs. +4. Comment your code well to explain important operations. +5. Make your code complete and self-contained. +6. Add appropriate error handling. + +FORMATTING INSTRUCTIONS: +- Return ONLY the Python code, WITHOUT introduction, explanation, or conclusion text +- Do NOT use code block markers like ```python or ``` +- Do NOT explain what the code does before or after +- Do NOT add any text that isn't valid Python code +- Start your answer directly with valid Python code +- End your answer with valid Python code + +For required packages, place them in a specially formatted comment at the beginning of your code in one line as follows: +# REQUIREMENTS: pandas,numpy,matplotlib,requests + +Your entire answer must be valid Python that can be executed without modifications. +""" + + # Create messages for API + messages = [ + {"role": "system", "content": "You are a Python code generator who provides ONLY clean, executable Python code with no explanations, markdown formatting, or non-code text."}, + {"role": "user", "content": ai_prompt} + ] + + # Call API + logging.info(f"Calling AI API to generate code") + generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature) + + # Clean up the generated content to ensure it's only valid Python code + code = self._clean_code(generated_content) + + # Extract requirements from special comment at beginning of code + requirements = [] + for line in code.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + req_str = line.replace("# REQUIREMENTS:", "").strip() + requirements = [r.strip() for r in req_str.split(',') if r.strip()] + break + + return code, requirements + + except Exception as e: + logging.error(f"Error generating code with AI: {str(e)}") + # Return basic error handling code and no requirements + error_str = str(e).replace('"', '\\"') + return f""" +# Error in code generation +print(f"An error occurred during code generation: {error_str}") +# Return error result +result = {{"error": "Code generation failed", "message": "{error_str}"}} +""", [] + + async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]: + """ + Execute Python code in an isolated environment. + + Args: + code: The Python code to execute + requirements: List of required packages + context: Additional context for execution + + Returns: + Result of code execution + """ + # Use virtual code executor for isolated execution + try: + executor = SimpleCodeExecutor( + timeout=self.executor_timeout, + max_memory_mb=self.executor_memory_limit, + requirements=requirements, + ai_service=self.ai_service + ) + + # Prepare input data for the code + input_data = {"context": context} if context else {} + + # Execute code + result = executor.execute_code(code, input_data) + + # Clean up environment + executor.cleanup() + + return result + + except Exception as e: + error_message = f"Error during code execution: {str(e)}" + logger.error(error_message) + + return { + "success": False, + "output": "", + "error": error_message, + "result": None + } + + +class SimpleCodeExecutor: + """ + A simplified executor that runs Python code in isolated virtual environments. + """ + + def __init__(self, + timeout: int = 30, + max_memory_mb: int = 512, + requirements: List[str] = None, + ai_service = None): + """ + Initialize the SimpleCodeExecutor. + + Args: + timeout: Maximum execution time in seconds + max_memory_mb: Maximum memory in MB + requirements: List of packages to install + ai_service: Optional - AI service for further processing + """ + self.timeout = timeout + self.max_memory_mb = max_memory_mb + self.temp_dir = None + self.requirements = requirements or [] + self.blocked_packages = [ + "cryptography", "flask", "django", "tornado", # Security risks + "tensorflow", "pytorch", "scikit-learn" # Resource-intensive packages + ] + self.ai_service = ai_service + + def _create_venv(self) -> str: + """Create a virtual environment and return the path.""" + # Create new environment + venv_parent_dir = tempfile.mkdtemp(prefix="code_exec_") + self.temp_dir = venv_parent_dir + venv_path = os.path.join(venv_parent_dir, "venv") + + try: + # Create virtual environment + subprocess.run([sys.executable, "-m", "venv", venv_path], + check=True, + capture_output=True) + + return venv_path + except subprocess.CalledProcessError as e: + logger.error(f"Error creating virtual environment: {e}") + raise RuntimeError(f"Virtual environment could not be created: {e}") + + def _get_python_executable(self, venv_path: str) -> str: + """Return the path to the Python executable in the virtual environment.""" + if os.name == 'nt': # Windows + return os.path.join(venv_path, "Scripts", "python.exe") + else: # Unix/Linux + return os.path.join(venv_path, "bin", "python") + + def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: + """ + Execute Python code in an isolated environment. + + Args: + code: Python code to execute + input_data: Optional input data for the code + + Returns: + Dictionary with execution results + """ + logger.info("Executing code in isolated environment") + + # Create virtual environment + venv_path = self._create_venv() + + # Create file for the code + code_id = uuid.uuid4().hex[:8] + code_file = os.path.join(self.temp_dir, f"code_{code_id}.py") + + # Write code + with open(code_file, "w", encoding="utf-8") as f: + f.write(code) + + # Get Python executable + python_executable = self._get_python_executable(venv_path) + logger.info(f"Using Python executable: {python_executable}") + + # Execute code + try: + # Execute code from root directory + working_dir = os.path.dirname(code_file) + process = subprocess.run( + [python_executable, code_file], + timeout=self.timeout, + capture_output=True, + text=True, + cwd=working_dir + ) + + # Process output + stdout = process.stdout + stderr = process.stderr + + # Get result from stdout if available + result_data = None + if process.returncode == 0 and stdout: + try: + # Look for the last line that could be JSON + for line in reversed(stdout.strip().split('\n')): + line = line.strip() + if line and line[0] in '{[' and line[-1] in '}]': + try: + result_data = json.loads(line) + # Use successfully parsed JSON result + break + except json.JSONDecodeError: + # Not valid JSON, continue with next line + continue + except Exception as e: + logger.warning(f"Error parsing result from stdout: {str(e)}") + + # Create result dictionary + execution_result = { + "success": process.returncode == 0, + "output": stdout, + "error": stderr if process.returncode != 0 else "", + "result": result_data, + "exit_code": process.returncode + } + + except subprocess.TimeoutExpired: + logger.error(f"Execution timed out after {self.timeout} seconds") + execution_result = { + "success": False, + "output": "", + "error": f"Execution timed out (timeout after {self.timeout} seconds)", + "result": None, + "exit_code": -1 + } + except Exception as e: + logger.error(f"Execution error: {str(e)}") + execution_result = { + "success": False, + "output": "", + "error": f"Execution error: {str(e)}", + "result": None, + "exit_code": -1 + } + + # Clean up temporary code file + try: + if os.path.exists(code_file): + os.remove(code_file) + except Exception as e: + logger.warning(f"Error cleaning up temporary code file: {e}") + + return execution_result + + def cleanup(self): + """Clean up temporary resources.""" + # Clean up temporary directory + if self.temp_dir and os.path.exists(self.temp_dir): + try: + shutil.rmtree(self.temp_dir) + logger.info(f"Temporary directory deleted: {self.temp_dir}") + except Exception as e: + logger.warning(f"Temporary directory {self.temp_dir} could not be deleted: {e}") + + def __del__(self): + """Cleanup during garbage collection.""" + self.cleanup() + + +# Factory function for the Coder agent +def get_coder_agent(): + """ + Factory function that returns an instance of the Coder agent. + + Returns: + An instance of the Coder agent + """ + return AgentCoder() \ No newline at end of file diff --git a/modules/_backup_lucydom_interface copy.py b/modules/_backup_lucydom_interface copy.py new file mode 100644 index 00000000..f8607580 --- /dev/null +++ b/modules/_backup_lucydom_interface copy.py @@ -0,0 +1,1183 @@ +import logging +import uuid +from datetime import datetime +from typing import Dict, Any, List, Optional, Union + +import importlib +import hashlib + +from connectors.connector_db_json import DatabaseConnector +from modules.configuration import APP_CONFIG + +logger = logging.getLogger(__name__) + +# Custom exceptions for file handling +class FileError(Exception): + """Base class for file handling exceptions.""" + pass + +class FileNotFoundError(FileError): + """Exception raised when a file is not found.""" + pass + +class FileStorageError(FileError): + """Exception raised when there's an error storing a file.""" + pass + +class FilePermissionError(FileError): + """Exception raised when there's a permission issue with a file.""" + pass + +class FileDeletionError(FileError): + """Exception raised when there's an error deleting a file.""" + pass + + +class LucyDOMInterface: + """ + Interface zur LucyDOM-Datenbank. + Verwendet den JSON-Konnektor für den Datenzugriff. + """ + + def __init__(self, mandate_id: int, user_id: int): + """ + Initialisiert das LucyDOM-Interface mit Mandanten- und Benutzerkontext. + + Args: + mandate_id: ID des aktuellen Mandanten + user_id: ID des aktuellen Benutzers + """ + self.mandate_id = mandate_id + self.user_id = user_id + + # Datenmodell-Modul importieren + try: + self.model_module = importlib.import_module("modules.lucydom_model") + logger.info("lucydom_model erfolgreich importiert") + except ImportError as e: + logger.error(f"Fehler beim Importieren von lucydom_model: {e}") + raise + + # Datenbank initialisieren, falls nötig + self._initialize_database() + + def _initialize_database(self): + """ + Initialisiert die Datenbank mit minimalen Objekten für den angemeldeten Benutzer im Mandanten, falls sie noch nicht existiert. + Ohne gültigen Benutzer keine Initialisierung. + Erstellt für jede im Datenmodell definierte Tabelle einen initialen Datensatz. + """ + effective_mandate_id = self.mandate_id + effective_user_id = self.user_id + if effective_mandate_id is None or effective_user_id is None: + #data available + return + + self.db = DatabaseConnector( + db_host=APP_CONFIG.get("DB_LUCYDOM_HOST"), + db_database=APP_CONFIG.get("DB_LUCYDOM_DATABASE"), + db_user=APP_CONFIG.get("DB_LUCYDOM_USER"), + db_password=APP_CONFIG.get("DB_LUCYDOM_PASSWORD_SECRET"), + mandate_id=self.mandate_id, + user_id=self.user_id + ) + + # Initialisierung von Standard-Prompts für verschiedene Bereiche + prompts = self.db.get_recordset("prompts") + if not prompts: + logger.info("Erstelle Standard-Prompts") + + # Standard-Prompts definieren + standard_prompts = [ + { + "mandate_id": effective_mandate_id, + "user_id": effective_user_id, + "content": "Recherchiere die aktuellen Markttrends und Entwicklungen im Bereich [THEMA]. Sammle Informationen zu führenden Unternehmen, innovativen Produkten oder Dienstleistungen und aktuellen Herausforderungen. Präsentiere die Ergebnisse in einer strukturierten Übersicht mit relevanten Daten und Quellen.", + "name": "Web Research: Marktforschung" + }, + { + "mandate_id": effective_mandate_id, + "user_id": effective_user_id, + "content": "Analysiere den beigefügten Datensatz zu [THEMA] und identifiziere die wichtigsten Trends, Muster und Auffälligkeiten. Führe statistische Berechnungen durch, um deine Erkenntnisse zu untermauern. Stelle die Ergebnisse in einer klar strukturierten Analyse dar und ziehe relevante Schlussfolgerungen.", + "name": "Analyse: Datenanalyse" + }, + { + "mandate_id": effective_mandate_id, + "user_id": effective_user_id, + "content": "Erstelle ein detailliertes Protokoll unserer Besprechung zum Thema [THEMA]. Erfasse alle besprochenen Punkte, getroffenen Entscheidungen und vereinbarten Maßnahmen. Strukturiere das Protokoll übersichtlich mit Tagesordnungspunkten, Teilnehmerliste und klaren Verantwortlichkeiten für die Follow-up-Aktionen.", + "name": "Protokoll: Besprechungsprotokoll" + }, + { + "mandate_id": effective_mandate_id, + "user_id": effective_user_id, + "content": "Entwickle ein UI/UX-Designkonzept für [ANWENDUNG/WEBSITE]. Berücksichtige die Zielgruppe, Hauptfunktionen und die Markenidentität. Beschreibe die visuelle Gestaltung, Navigation, Interaktionsmuster und Informationsarchitektur. Erläutere, wie das Design die Benutzerfreundlichkeit und das Nutzererlebnis optimiert.", + "name": "Design: UI/UX Design" + } + ] + + # Prompts erstellen + for prompt_data in standard_prompts: + created_prompt = self.db.record_create("prompts", prompt_data) + logger.info(f"Prompt '{prompt_data.get('name', 'Standard')}' wurde erstellt mit ID {created_prompt['id']}") + + + # Utilities + + def get_initial_id(self, table: str) -> Optional[int]: + """ + Gibt die initiale ID für eine Tabelle zurück. + + Args: + table: Name der Tabelle + + Returns: + Die initiale ID oder None, wenn nicht vorhanden + """ + return self.db.get_initial_id(table) + + def _get_current_timestamp(self) -> str: + """Gibt den aktuellen Zeitstempel im ISO-Format zurück""" + return datetime.now().isoformat() + + + # Prompt-Methoden + + def get_all_prompts(self) -> List[Dict[str, Any]]: + """Gibt alle Prompts des aktuellen Mandanten zurück""" + return self.db.get_recordset("prompts") + + def get_prompt(self, prompt_id: int) -> Optional[Dict[str, Any]]: + """Gibt einen Prompt anhand seiner ID zurück""" + prompts = self.db.get_recordset("prompts", record_filter={"id": prompt_id}) + if prompts: + return prompts[0] + return None + + def create_prompt(self, content: str, name: str) -> Dict[str, Any]: + """Erstellt einen neuen Prompt""" + prompt_data = { + "mandate_id": self.mandate_id, + "user_id": self.user_id, + "content": content, + "name": name, + "created_at": self._get_current_timestamp() + } + + return self.db.record_create("prompts", prompt_data) + + def update_prompt(self, prompt_id: int, content: str = None, name: str = None) -> Dict[str, Any]: + """ + Aktualisiert einen vorhandenen Prompt + + Args: + prompt_id: ID des zu aktualisierenden Prompts + content: Neuer Inhalt des Prompts + + Returns: + Das aktualisierte Prompt-Objekt + """ + # Prüfen, ob der Prompt existiert + prompt = self.get_prompt(prompt_id) + if not prompt: + return None + + # Daten für die Aktualisierung vorbereiten + prompt_data = {} + + if content is not None: + prompt_data["content"] = content + if name is not None: + prompt_data["name"] = name + + # Prompt aktualisieren + return self.db.record_modify("prompts", prompt_id, prompt_data) + + def delete_prompt(self, prompt_id: int) -> bool: + """ + Löscht einen Prompt aus der Datenbank + + Args: + prompt_id: ID des zu löschenden Prompts + + Returns: + True, wenn der Prompt erfolgreich gelöscht wurde, sonst False + """ + return self.db.record_delete("prompts", prompt_id) + + + # File Utilities + + def calculate_file_hash(self, file_content: bytes) -> str: + """Berechnet einen SHA-256-Hash für den Dateiinhalt""" + return hashlib.sha256(file_content).hexdigest() + + def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]: + """Prüft, ob bereits eine Datei mit demselben Hash existiert""" + files = self.db.get_recordset("files", record_filter={"file_hash": file_hash}) + if files: + return files[0] + return None + + def get_mime_type(self, filename: str) -> str: + """Ermittelt den MIME-Typ basierend auf der Dateiendung""" + import os + ext = os.path.splitext(filename)[1].lower()[1:] + extension_to_mime = { + "pdf": "application/pdf", + "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "doc": "application/msword", + "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + "xls": "application/vnd.ms-excel", + "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", + "ppt": "application/vnd.ms-powerpoint", + "csv": "text/csv", + "txt": "text/plain", + "json": "application/json", + "xml": "application/xml", + "html": "text/html", + "htm": "text/html", + "jpg": "image/jpeg", + "jpeg": "image/jpeg", + "png": "image/png", + "gif": "image/gif", + "webp": "image/webp", + "svg": "image/svg+xml", + "py": "text/x-python", + "js": "application/javascript", + "css": "text/css" + } + return extension_to_mime.get(ext.lower(), "application/octet-stream") + + + # File Methoden - Metadaten-basierte Operationen + + def get_all_files(self) -> List[Dict[str, Any]]: + """ + Gibt alle Dateien des aktuellen Mandanten zurück ohne Binärdaten. + + Returns: + Liste von FileItem-Objekten ohne Binärdaten + """ + files = self.db.get_recordset("files") + return files + + def get_file(self, file_id: int) -> Optional[Dict[str, Any]]: + """ + Gibt eine Datei anhand ihrer ID zurück, ohne Binärdaten. + + Args: + file_id: ID der gesuchten Datei + + Returns: + FileItem ohne Binärdaten oder None, wenn nicht gefunden + """ + files = self.db.get_recordset("files", record_filter={"id": file_id}) + if files: + return files[0] + return None + + def create_file(self, name: str, mime_type: str, size: int = None, file_hash: str = None) -> Dict[str, Any]: + """ + Erstellt einen neuen Dateieintrag in der Datenbank ohne Inhalt. + Der eigentliche Dateiinhalt wird separat in der FileData-Tabelle gespeichert. + + Args: + name: Name der Datei + mime_type: MIME-Typ der Datei + size: Größe der Datei in Bytes + file_hash: Hash-Wert der Datei für Deduplizierung + + Returns: + Das erstellte FileItem-Objekt + """ + file_data = { + "mandate_id": self.mandate_id, + "user_id": self.user_id, + "name": name, + "mime_type": mime_type, + "size": size, + "file_hash": file_hash, + "creation_date": self._get_current_timestamp() + } + return self.db.record_create("files", file_data) + + def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Aktualisiert die Metadaten einer vorhandenen Datei ohne die Binärdaten zu beeinflussen. + + Args: + file_id: ID der zu aktualisierenden Datei + update_data: Dictionary mit zu aktualisierenden Feldern + + Returns: + Das aktualisierte FileItem-Objekt + """ + # Prüfen, ob die Datei existiert + file = self.get_file(file_id) + if not file: + raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") + + # Datei aktualisieren + return self.db.record_modify("files", file_id, update_data) + + def delete_file(self, file_id: int) -> bool: + """ + Löscht eine Datei aus der Datenbank (Metadaten und Inhalt). + + Args: + file_id: ID der Datei + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + # Suche die Datei in der Datenbank + file = self.get_file(file_id) + + if not file: + raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") + + # Prüfe, ob die Datei zum aktuellen Mandanten gehört + if file.get("mandate_id") != self.mandate_id: + raise FilePermissionError(f"Keine Berechtigung zum Löschen der Datei {file_id}") + + # Check for other references to this file (by hash) + file_hash = file.get("file_hash") + if file_hash: + other_references = [f for f in self.db.get_recordset("files", record_filter={"file_hash": file_hash}) + if f.get("id") != file_id] + + # If other files reference this content, only delete the database entry for FileItem + if other_references: + logger.info(f"Andere Referenzen auf den Dateiinhalt gefunden, nur FileItem wird gelöscht: {file_id}") + else: + # Lösche auch den Dateiinhalt in der FileData-Tabelle + try: + file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) + if file_data_entries: + self.db.record_delete("file_data", file_id) + logger.info(f"FileData für Datei {file_id} gelöscht") + except Exception as e: + logger.warning(f"Fehler beim Löschen des FileData für Datei {file_id}: {str(e)}") + + # Lösche den FileItem-Eintrag + return self.db.record_delete("files", file_id) + + except FileNotFoundError as e: + # Pass through FileNotFoundError + raise + except FilePermissionError as e: + # Pass through FilePermissionError + raise + except Exception as e: + logger.error(f"Fehler beim Löschen der Datei {file_id}: {str(e)}") + raise FileDeletionError(f"Fehler beim Löschen der Datei: {str(e)}") + + + # FileData Methoden - Binärdaten-basierte Operationen + + def create_file_data(self, file_id: int, data: bytes) -> bool: + """ + Speichert die Binärdaten einer Datei in der Datenbank als Base64-String. + + Args: + file_id: ID der zugehörigen Datei + data: Binärdaten + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + import base64 + + # Convert binary data to base64 string + if isinstance(data, bytes): + encoded_data = base64.b64encode(data).decode('utf-8') + logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encoded_data)}") + else: + logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...") + # Try to convert to bytes if it's not already + if isinstance(data, str): + # Check if it might already be base64 encoded + try: + # See if it's valid base64 + base64.b64decode(data) + # If no error, assume it's already encoded + encoded_data = data + logger.info(f"Data appears to be already base64 encoded, using as is") + except: + # Not base64, so encode the string as bytes then to base64 + encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') + logger.info(f"Converted string to base64") + else: + # For other types, convert to string first + encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8') + logger.warning(f"Converted non-standard type to base64") + + # Create the file_data record with encoded data + file_data = { + "id": file_id, + "data": encoded_data + } + + self.db.record_create("file_data", file_data) + logger.info(f"Successfully stored encoded data for file {file_id}") + return True + except Exception as e: + logger.error(f"Fehler beim Speichern der Binärdaten für Datei {file_id}: {str(e)}") + return False + + def get_file_data(self, file_id: int) -> Optional[bytes]: + """ + Gibt die Binärdaten einer Datei zurück. + Konvertiert Base64-String aus der Datenbank zurück zu bytes. + + Args: + file_id: ID der Datei + + Returns: + Binärdaten oder None, wenn nicht gefunden + """ + import base64 + + file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) + if file_data_entries and "data" in file_data_entries[0]: + encoded_data = file_data_entries[0]["data"] + + try: + # Check if it's a string (most likely base64) + if isinstance(encoded_data, str): + try: + # Try to decode base64 + binary_data = base64.b64decode(encoded_data) + logger.debug(f"Successfully decoded base64 string to {len(binary_data)} bytes") + return binary_data + except Exception as e: + logger.error(f"Failed to decode base64 data: {str(e)}") + # If it's not valid base64, return as bytes + return encoded_data.encode('utf-8') + # If it's already bytes (shouldn't happen with model change) + elif isinstance(encoded_data, bytes): + logger.warning(f"Data was already bytes, no conversion needed") + return encoded_data + else: + logger.error(f"Unexpected data type in database: {type(encoded_data)}") + return None + except Exception as e: + logger.error(f"Error processing file data: {str(e)}") + return None + else: + logger.warning(f"No data found for file ID {file_id}") + return None + + def update_file_data(self, file_id: int, data: Union[bytes, str]) -> bool: + """ + Aktualisiert die Binärdaten einer Datei in der Datenbank. + Konvertiert bytes zu Base64-String für die Speicherung. + + Args: + file_id: ID der Datei + data: Neue Binärdaten oder kodierte Daten + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + import base64 + + # Convert data to base64 string if it's bytes + if isinstance(data, bytes): + encoded_data = base64.b64encode(data).decode('utf-8') + logger.debug(f"Converted {len(data)} bytes to base64 string") + elif isinstance(data, str): + # Check if it might already be base64 encoded + try: + # See if it's valid base64 + base64.b64decode(data) + # If no error, assume it's already encoded + encoded_data = data + logger.debug(f"Data appears to be already base64 encoded, using as is") + except: + # Not base64, so encode the string as bytes then to base64 + encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8') + logger.debug(f"Converted string to base64") + else: + # For other types, convert to string first + encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8') + logger.warning(f"Converted non-standard type to base64") + + # Check if a record already exists + file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id}) + + if file_data_entries: + # Update the existing record + self.db.record_modify("file_data", file_id, {"data": encoded_data}) + logger.info(f"Updated existing file data for file ID {file_id}") + else: + # Create a new record + file_data = { + "id": file_id, + "data": encoded_data + } + self.db.record_create("file_data", file_data) + logger.info(f"Created new file data for file ID {file_id}") + + return True + except Exception as e: + logger.error(f"Fehler beim Aktualisieren der Binärdaten für Datei {file_id}: {str(e)}") + return False + + def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]: + """ + Speichert eine hochgeladene Datei in der Datenbank. + Metadaten werden in der 'files'-Tabelle gespeichert, + Binärdaten in der 'file_data'-Tabelle als Base64-String. + + Args: + file_content: Binärdaten der Datei + file_name: Name der Datei + + Returns: + Dictionary mit Metadaten der gespeicherten Datei + """ + try: + # Debug: Log the start of the file upload process + logger.info(f"Starting upload process for file: {file_name}") + + # Debug: Check if file_content is valid bytes + if not isinstance(file_content, bytes): + logger.error(f"Invalid file_content type: {type(file_content)}") + raise ValueError(f"file_content must be bytes, got {type(file_content)}") + + # Calculate file hash for deduplication + file_hash = self.calculate_file_hash(file_content) + logger.debug(f"Calculated file hash: {file_hash}") + + # Check for duplicate + existing_file = self.check_for_duplicate_file(file_hash) + if existing_file: + # Simply return the existing file metadata + logger.info(f"Duplikat gefunden für {file_name}: {existing_file['id']}") + return existing_file + + # MIME-Typ bestimmen + mime_type = self.get_mime_type(file_name) + + # Dateigröße bestimmen + file_size = len(file_content) + + # 1. Speichere Metadaten in der 'files'-Tabelle + logger.info(f"Saving file metadata to database for file: {file_name}") + db_file = self.create_file( + name=file_name, + mime_type=mime_type, + size=file_size, + file_hash=file_hash + ) + + # 2. Speichere Binärdaten als Base64-String in der 'file_data'-Tabelle + logger.info(f"Saving file content to database for file: {file_name}") + self.create_file_data(db_file["id"], file_content) + + # Debug: Verify database record was created + if not db_file: + logger.warning(f"Database record for file {file_name} was not created properly") + else: + logger.info(f"Database record created for file {file_name}") + + logger.info(f"File upload process completed for: {file_name}") + return db_file + + except Exception as e: + logger.error(f"Error in save_uploaded_file for {file_name}: {str(e)}", exc_info=True) + raise FileStorageError(f"Fehler beim Speichern der Datei: {str(e)}") + + def download_file(self, file_id: int) -> Optional[Dict[str, Any]]: + """ + Gibt eine Datei zum Download zurück, einschließlich Binärdaten. + + Args: + file_id: ID der Datei + + Returns: + Dictionary mit Dateidaten und -metadaten oder None, wenn nicht gefunden + """ + try: + # 1. Metadaten aus der 'files'-Tabelle holen + file = self.get_file(file_id) + + if not file: + raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") + + # 2. Binärdaten aus der 'file_data'-Tabelle holen + file_content = self.get_file_data(file_id) + + if file_content is None: + raise FileNotFoundError(f"Binärdaten für Datei mit ID {file_id} nicht gefunden") + + return { + "id": file_id, + "name": file.get("name", f"file_{file_id}"), + "content_type": file.get("mime_type", "application/octet-stream"), + "size": file.get("size", len(file_content)), + "content": file_content + } + except FileNotFoundError as e: + # Re-raise FileNotFoundError as is + raise + except Exception as e: + logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}") + raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}") + + + # Workflow Methoden + + def get_all_workflows(self) -> List[Dict[str, Any]]: + """Gibt alle Workflows des aktuellen Mandanten zurück""" + return self.db.get_recordset("workflows") + + def get_workflows_by_user(self, user_id: int) -> List[Dict[str, Any]]: + """Gibt alle Workflows eines Benutzers zurück""" + return self.db.get_recordset("workflows", record_filter={"user_id": user_id}) + + def get_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]: + """Gibt einen Workflow anhand seiner ID zurück""" + workflows = self.db.get_recordset("workflows", record_filter={"id": workflow_id}) + if workflows: + return workflows[0] + return None + + def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]: + """Erstellt einen neuen Workflow in der Datenbank""" + # Stellen Sie sicher, dass mandate_id und user_id gesetzt sind + if "mandate_id" not in workflow_data: + workflow_data["mandate_id"] = self.mandate_id + + if "user_id" not in workflow_data: + workflow_data["user_id"] = self.user_id + + # Zeitstempel setzen, falls nicht vorhanden + current_time = self._get_current_timestamp() + if "started_at" not in workflow_data: + workflow_data["started_at"] = current_time + + if "last_activity" not in workflow_data: + workflow_data["last_activity"] = current_time + + # Stelle sicher, dass last_message_id gesetzt ist, falls nicht vorhanden + if "last_message_id" not in workflow_data: + workflow_data["last_message_id"] = "" + + return self.db.record_create("workflows", workflow_data) + + def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Aktualisiert einen vorhandenen Workflow. + + Args: + workflow_id: ID des zu aktualisierenden Workflows + workflow_data: Neue Daten für den Workflow + + Returns: + Das aktualisierte Workflow-Objekt + """ + # Prüfen, ob der Workflow existiert + workflow = self.get_workflow(workflow_id) + if not workflow: + return None + + # Aktualisierungszeit setzen + workflow_data["last_activity"] = self._get_current_timestamp() + + # Workflow aktualisieren + return self.db.record_modify("workflows", workflow_id, workflow_data) + + def delete_workflow(self, workflow_id: str) -> bool: + """ + Löscht einen Workflow aus der Datenbank. + + Args: + workflow_id: ID des zu löschenden Workflows + + Returns: + True bei Erfolg, False wenn der Workflow nicht existiert + """ + # Prüfen, ob der Workflow existiert + workflow = self.get_workflow(workflow_id) + if not workflow: + return False + + # Prüfen, ob der Benutzer der Eigentümer ist oder Admin-Rechte hat + if workflow.get("user_id") != self.user_id: + # Hier könnte eine Prüfung auf Admin-Rechte erfolgen + return False + + # Workflow löschen + return self.db.record_delete("workflows", workflow_id) + + + # Workflow Messages + + def get_workflow_messages(self, workflow_id: str) -> List[Dict[str, Any]]: + """Gibt alle Nachrichten eines Workflows zurück""" + return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id}) + + def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]: + """Erstellt eine neue Nachricht für einen Workflow + + Args: + message_data: Die Nachrichtendaten + + Returns: + Die erstellte Nachricht oder None bei Fehler + """ + try: + # Check if required fields are present + required_fields = ["id", "workflow_id"] + for field in required_fields: + if field not in message_data: + logger.error(f"Pflichtfeld '{field}' fehlt in message_data") + raise ValueError(f"Pflichtfeld '{field}' fehlt in den Nachrichtendaten") + + # Validate that ID is not None + if message_data["id"] is None: + message_data["id"] = f"msg_{uuid.uuid4()}" + logger.warning(f"Automatisch generierte ID für Workflow-Nachricht: {message_data['id']}") + + # Stellen Sie sicher, dass die benötigten Felder vorhanden sind + if "started_at" not in message_data and "created_at" not in message_data: + message_data["started_at"] = self._get_current_timestamp() + + # Wenn "created_at" vorhanden ist, übertrage es nach "started_at" + if "created_at" in message_data and "started_at" not in message_data: + message_data["started_at"] = message_data["created_at"] + del message_data["created_at"] + + # Status setzen, falls nicht vorhanden + if "status" not in message_data: + message_data["status"] = "completed" + + # Sequenznummer setzen, falls nicht vorhanden + if "sequence_no" not in message_data: + # Hole aktuelle Nachrichten, um die nächste Sequenznummer zu bestimmen + existing_messages = self.get_workflow_messages(message_data["workflow_id"]) + message_data["sequence_no"] = len(existing_messages) + 1 + + # Debug-Log für die zu erstellenden Daten + logger.debug(f"Erstelle Workflow-Nachricht mit Daten: {message_data}") + + return self.db.record_create("workflow_messages", message_data) + except Exception as e: + logger.error(f"Fehler beim Erstellen der Workflow-Nachricht: {str(e)}") + # Return None instead of raising to avoid cascading failures + return None + + def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Aktualisiert eine bestehende Workflow-Nachricht in der Datenbank + with improved document handling. + + Args: + message_id: ID der Nachricht + message_data: Zu aktualisierende Daten + + Returns: + Das aktualisierte Nachrichtenobjekt oder None bei Fehler + """ + try: + # Print debug info + print(f"Updating message {message_id} in database") + + # Ensure message_id is provided + if not message_id: + logger.error("No message_id provided for update_workflow_message") + raise ValueError("message_id cannot be empty") + + # Check if message exists in database + messages = self.db.get_recordset("workflow_messages", record_filter={"id": message_id}) + if not messages: + logger.warning(f"Message with ID {message_id} does not exist in database") + + # If message doesn't exist but we have workflow_id, create it + if "workflow_id" in message_data: + logger.info(f"Creating new message with ID {message_id} for workflow {message_data.get('workflow_id')}") + return self.db.record_create("workflow_messages", message_data) + else: + logger.error(f"Workflow ID missing for new message {message_id}") + return None + + # Ensure documents array is handled properly + if "documents" in message_data: + logger.info(f"Message {message_id} has {len(message_data['documents'])} documents") + + # Make sure we're not storing huge content in the database + # For each document, ensure content size is reasonable + documents_to_store = [] + for doc in message_data["documents"]: + doc_copy = doc.copy() + + # Process contents array if it exists + if "contents" in doc_copy: + # Ensure contents is not too large - limit text size + for content in doc_copy["contents"]: + if content.get("type") == "text" and "text" in content: + text = content["text"] + if len(text) > 1000: # Limit text preview to 1000 chars + content["text"] = text[:1000] + "... [truncated]" + + documents_to_store.append(doc_copy) + + # Replace with the processed documents + message_data["documents"] = documents_to_store + + # Log the update data size for debugging + update_data_size = len(str(message_data)) + logger.debug(f"Update data size: {update_data_size} bytes") + + # Ensure ID is in the dataset + if 'id' not in message_data: + message_data['id'] = message_id + + # Konvertiere created_at zu started_at falls nötig + if "created_at" in message_data and "started_at" not in message_data: + message_data["started_at"] = message_data["created_at"] + del message_data["created_at"] + + # Update the message + updated_message = self.db.record_modify("workflow_messages", message_id, message_data) + if updated_message: + logger.info(f"Message {message_id} updated successfully") + else: + logger.warning(f"Failed to update message {message_id}") + + return updated_message + except Exception as e: + logger.error(f"Error updating message {message_id}: {str(e)}", exc_info=True) + # Re-raise with full information + raise ValueError(f"Error updating message {message_id}: {str(e)}") + + def delete_workflow_message(self, workflow_id: str, message_id: str) -> bool: + """ + Löscht eine Nachricht aus einem Workflow in der Datenbank. + + Args: + workflow_id: ID des zugehörigen Workflows + message_id: ID der zu löschenden Nachricht + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + # Prüfen, ob die Nachricht existiert + messages = self.get_workflow_messages(workflow_id) + message = next((m for m in messages if m.get("id") == message_id), None) + + if not message: + logger.warning(f"Nachricht {message_id} für Workflow {workflow_id} nicht gefunden") + return False + + # Nachricht aus der Datenbank löschen + return self.db.record_delete("workflow_messages", message_id) + except Exception as e: + logger.error(f"Fehler beim Löschen der Nachricht {message_id}: {str(e)}") + return False + + def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: int) -> bool: + """ + Entfernt eine Dateireferenz aus einer Nachricht. + Die Datei selbst wird nicht gelöscht, nur die Referenz in der Nachricht. + Enhanced version with improved file matching. + + Args: + workflow_id: ID des zugehörigen Workflows + message_id: ID der Nachricht + file_id: ID der zu entfernenden Datei + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + # Log operation + logger.info(f"Removing file {file_id} from message {message_id} in workflow {workflow_id}") + + # Get all workflow messages + all_messages = self.get_workflow_messages(workflow_id) + logger.debug(f"Workflow {workflow_id} has {len(all_messages)} messages") + + # Try different approaches to find the message + message = None + + # Exact match + message = next((m for m in all_messages if m.get("id") == message_id), None) + + # Case-insensitive match + if not message and isinstance(message_id, str): + message = next((m for m in all_messages + if isinstance(m.get("id"), str) and m.get("id").lower() == message_id.lower()), None) + + # Partial match (starts with) + if not message and isinstance(message_id, str): + message = next((m for m in all_messages + if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None) + + if not message: + logger.warning(f"Message {message_id} not found in workflow {workflow_id}") + return False + + # Log the found message + logger.info(f"Found message: {message.get('id')}") + + # Check if message has documents + if "documents" not in message or not message["documents"]: + logger.warning(f"No documents in message {message_id}") + return False + + # Log existing documents + documents = message.get("documents", []) + logger.debug(f"Message has {len(documents)} documents") + for i, doc in enumerate(documents): + doc_id = doc.get("id", "unknown") + file_id_value = doc.get("file_id", "unknown") + logger.debug(f"Document {i}: doc_id={doc_id}, file_id={file_id_value}") + + # Create a new list of documents without the one to delete + updated_documents = [] + removed = False + + for doc in documents: + doc_id = doc.get("id") + file_id_value = doc.get("file_id") + + # Flexible matching approach + should_remove = ( + (doc_id == file_id) or + (file_id_value == file_id) or + (isinstance(doc_id, str) and str(file_id) in doc_id) or + (isinstance(file_id_value, str) and str(file_id) in file_id_value) + ) + + if should_remove: + removed = True + logger.info(f"Found file to remove: doc_id={doc_id}, file_id={file_id_value}") + else: + updated_documents.append(doc) + + if not removed: + logger.warning(f"No matching file {file_id} found in message {message_id}") + return False + + # Update message with modified documents array + message_update = { + "documents": updated_documents + } + + # Apply the update directly to the database + updated = self.db.record_modify("workflow_messages", message["id"], message_update) + + if updated: + logger.info(f"Successfully removed file {file_id} from message {message_id}") + return True + else: + logger.warning(f"Failed to update message {message_id} in database") + return False + + except Exception as e: + logger.error(f"Error removing file {file_id} from message {message_id}: {str(e)}") + return False + + + # Workflow Logs + + def get_workflow_logs(self, workflow_id: str) -> List[Dict[str, Any]]: + """Gibt alle Log-Einträge eines Workflows zurück""" + return self.db.get_recordset("workflow_logs", record_filter={"workflow_id": workflow_id}) + + def create_workflow_log(self, log_data: Dict[str, Any]) -> Dict[str, Any]: + """Erstellt einen neuen Log-Eintrag für einen Workflow""" + # Stellen Sie sicher, dass die benötigten Felder vorhanden sind + if "timestamp" not in log_data: + log_data["timestamp"] = self._get_current_timestamp() + + return self.db.record_create("workflow_logs", log_data) + + + # Workflow Management + + def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool: + """ + Speichert den kompletten Zustand eines Workflows in der Datenbank. + Dies umfasst den Workflow selbst, Nachrichten und Logs. + + Args: + workflow: Das vollständige Workflow-Objekt + save_messages: Flag, ob Nachrichten gespeichert werden sollen + save_logs: Flag, ob Logs gespeichert werden sollen + + Returns: + True bei Erfolg, False bei Fehler + """ + try: + workflow_id = workflow.get("id") + if not workflow_id: + return False + + # Extrahiere nur die für die Datenbank relevanten Workflow-Felder + workflow_db_data = { + "id": workflow_id, + "mandate_id": workflow.get("mandate_id", self.mandate_id), + "user_id": workflow.get("user_id", self.user_id), + "name": workflow.get("name", f"Workflow {workflow_id}"), + "status": workflow.get("status", "unknown"), + "started_at": workflow.get("started_at", self._get_current_timestamp()), + "last_activity": workflow.get("last_activity", self._get_current_timestamp()), + "last_message_id": workflow.get("last_message_id", ""), + "data_stats": workflow.get("data_stats", {}) + } + + # Prüfen, ob der Workflow bereits existiert + existing_workflow = self.get_workflow(workflow_id) + if existing_workflow: + self.update_workflow(workflow_id, workflow_db_data) + else: + self.create_workflow(workflow_db_data) + + + # Nachrichten speichern + if save_messages and "messages" in workflow: + # Bestehende Nachrichten abrufen + existing_messages = {msg["id"]: msg for msg in self.get_workflow_messages(workflow_id)} + + for message in workflow["messages"]: + message_id = message.get("id") + if not message_id: + continue + + # Nur relevante Daten für die Datenbank extrahieren + message_data = { + "id": message_id, + "workflow_id": workflow_id, + "sequence_no": message.get("sequence_no", 0), + "role": message.get("role", "unknown"), + "content": message.get("content"), + "agent_name": message.get("agent_name"), + "status": message.get("status", "completed"), + "started_at": message.get("started_at", self._get_current_timestamp()), + "finished_at": message.get("finished_at"), + "parent_message_id": message.get("parent_message_id"), + # IMPORTANT: Include documents field to persist file attachments + "documents": message.get("documents", []) + } + + # Debug logging for documents + doc_count = len(message.get("documents", [])) + if doc_count > 0: + logger.info(f"Message {message_id} has {doc_count} documents to save") + + # Nachricht erstellen oder aktualisieren + if message_id in existing_messages: + self.db.record_modify("workflow_messages", message_id, message_data) + else: + self.db.record_create("workflow_messages", message_data) + + # Logs speichern + if save_logs and "logs" in workflow: + # Bestehende Logs abrufen + existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)} + + for log in workflow["logs"]: + log_id = log.get("id") + if not log_id: + continue + + # Nur relevante Daten für die Datenbank extrahieren + log_data = { + "id": log_id, + "workflow_id": workflow_id, + "message": log.get("message", ""), + "type": log.get("type", "info"), + "timestamp": log.get("timestamp", self._get_current_timestamp()), + "agent_id": log.get("agent_id"), + "agent_name": log.get("agent_name") + } + + # Log erstellen oder aktualisieren + if log_id in existing_logs: + self.db.record_modify("workflow_logs", log_id, log_data) + else: + self.db.record_create("workflow_logs", log_id, log_data) + + return True + except Exception as e: + logger.error(f"Fehler beim Speichern des Workflow-Zustands: {str(e)}") + return False + + def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]: + """ + Lädt den kompletten Zustand eines Workflows aus der Datenbank. + Dies umfasst den Workflow selbst, Nachrichten und Logs. + + Args: + workflow_id: ID des zu ladenden Workflows + + Returns: + Das vollständige Workflow-Objekt oder None bei Fehler + """ + try: + # Basis-Workflow laden + workflow = self.get_workflow(workflow_id) + if not workflow: + return None + + # Log the workflow base retrieval + logger.debug(f"Loaded base workflow {workflow_id} from database") + + # Nachrichten laden + messages = self.get_workflow_messages(workflow_id) + # Nach Sequenznummer sortieren + messages.sort(key=lambda x: x.get("sequence_no", 0)) + + # Debug log for messages and document counts + message_count = len(messages) + logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}") + + # Log document counts for each message + for msg in messages: + doc_count = len(msg.get("documents", [])) + if doc_count > 0: + logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database") + # Log document details for debugging + for i, doc in enumerate(msg.get("documents", [])): + file_id = doc.get("file_id", "unknown") + logger.debug(f"Document {i+1}: file_id={file_id}") + + # Logs laden + logs = self.get_workflow_logs(workflow_id) + # Nach Zeitstempel sortieren + logs.sort(key=lambda x: x.get("timestamp", "")) + + # Vollständiges Workflow-Objekt zusammenbauen + complete_workflow = workflow.copy() + complete_workflow["messages"] = messages + complete_workflow["logs"] = logs + + return complete_workflow + except Exception as e: + logger.error(f"Fehler beim Laden des Workflow-Zustands: {str(e)}") + return None + + +# Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext +_lucydom_interfaces = {} + +def get_lucydom_interface(mandate_id: int = 0, user_id: int = 0) -> LucyDOMInterface: + """ + Gibt eine LucyDOMInterface-Instanz für den angegebenen Kontext zurück. + Wiederverwendet bestehende Instanzen. + """ + context_key = f"{mandate_id}_{user_id}" + if context_key not in _lucydom_interfaces: + _lucydom_interfaces[context_key] = LucyDOMInterface(mandate_id, user_id) + return _lucydom_interfaces[context_key] + +# Init +get_lucydom_interface() \ No newline at end of file diff --git a/modules/chat.py b/modules/chat.py index cfc7a71e..359965f2 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -41,7 +41,7 @@ class ChatManager: self.ai_service = ChatService() self.lucy_interface = get_lucydom_interface(mandate_id, user_id) self.agent_registry = get_agent_registry() - + self.agent_registry.set_ai_service(self.ai_service) ### Chat Management @@ -60,7 +60,7 @@ class ChatManager: workflow = self.workflow_init(workflow_id) # 2. User-Input in Message-Objekt transformieren und im Workflow speichern - message_user = self.chat_message_to_workflow("user", "", user_input, workflow) + message_user = await self.chat_message_to_workflow("user", "", user_input, workflow) # 3. Projektleiter-Prompt erstellen und Antwort analysieren project_manager_response = await self.chat_prompt(message_user, workflow) @@ -88,7 +88,7 @@ class ChatManager: obj_results.extend(task_results) # 6. Erstelle die finale Antwort mit den relevanten Dokumenten aus obj_final_documents - final_message = self.chat_final_message(obj_user_response, obj_results, obj_final_documents) + final_message = await self.chat_final_message(obj_user_response, obj_final_documents, obj_results) self.message_add(workflow, final_message) # 7. Finalisiere den Workflow @@ -215,7 +215,7 @@ JSON_OUTPUT = {{ # Parsen der JSON-Antwort return self.parse_json_response(project_manager_output) - def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]: + async def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]: """ Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten. @@ -240,7 +240,7 @@ JSON_OUTPUT = {{ # Zusätzliche Dateien verarbeiten mit vollständigen Inhalten additional_fileids = chat_message.get("list_file_id", []) - additional_files = self.process_file_ids(additional_fileids) + additional_files = await self.process_file_ids(additional_fileids) # Nachrichtenobjekt erstellen message_object = { @@ -254,30 +254,20 @@ JSON_OUTPUT = {{ logger.debug(f"message_user = {self.parse_json2text(message_object)}.") return message_object - def chat_final_message(self, obj_user_response: str, obj_results: List[Dict[str, Any]], - obj_final_documents: List[Dict[str, Any]]) -> Dict[str, Any]: + async def chat_final_message(self, obj_user_response: str, obj_final_documents: List[Dict[str, Any]], obj_results: List[Dict[str, Any]], ) -> Dict[str, Any]: """ - Creates the final response message with documents corresponding to obj_final_documents. + Creates the final response message with review of proposed and delivered. Args: - obj_user_response: Text response to the user - obj_results: List of generated result documents + obj_user_response: Initial text response to the user obj_final_documents: List of expected response documents + obj_results: List of generated result documents Returns: Complete message object with content and relevant documents - """ - # Create basic message structure - final_message = { - "role": "assistant", - "agent_name": "project_manager", - "content": obj_user_response, - "documents": [] - } - + """ # Find documents that match the obj_final_documents requirements matching_documents = [] - doc_references = [] for answer_spec in obj_final_documents: answer_label = answer_spec.get("label") @@ -287,38 +277,58 @@ JSON_OUTPUT = {{ doc_name=self.get_filename(doc) # Check if this document matches the answer specification if doc_name == answer_label: - matching_documents.append(doc) - doc_type = answer_spec.get("doc_type", "Document") - doc_references.append(f"- {doc_name} ({doc_type})") + content_ref = [] + for c in doc.get("contents"): + content_ref.append(c.get("summary")) + doc_ref = { + "label": doc_name, + "content_summary": content_ref + } + matching_documents.append(doc_ref) break - # Add matching documents to the final message - final_message["documents"] = matching_documents - - # Add document references to the content if there are any - if doc_references: - doc_list = "\n".join(doc_references) - final_message["content"] += f"\n\nCreated documents:\n{doc_list}" - + final_prompt = await self.ai_service.call_api([ + {"role": "system", "content": "You are a project manager, who delivers results to a user."}, + {"role": "user", "content": f""" + Give the final short feedback to the user with reference to the initial statement (obj_user_response). Provide a list of delivered files (files_deliveded). If in the list of delivered files (files_delivered) some files from the original list (files_promised) are not available, then just give a comment on this, otherwise task is completed. + + Here the data: + obj_user_response = {self.parse_json2text(obj_user_response)} + files_promised = {self.parse_json2text(matching_documents)} + files_deliveded = {self.parse_json2text(obj_user_response)} + """ + } + ]) + + # Create basic message structure with proper fields + logger.debug(f"FINAL PROMPT = {self.parse_json2text(final_prompt)}.") + final_message = { + "role": "assistant", + "agent_name": "project_manager", + "content": final_prompt, + "documents": [] # DO NOT include the results documents, already with agents + } + + logger.debug(f"FINAL MESSAGE = {self.parse_json2text(final_message)}.") return final_message ### Workflow - + def workflow_init(self, workflow_id: Optional[str] = None) -> Dict[str, Any]: """ - Initialisiert einen Workflow oder lädt einen bestehenden mit Rundenzählung. + Initializes a workflow or loads an existing one with round counting. Args: - workflow_id: Optional - ID des zu ladenden Workflows + workflow_id: Optional - ID of the workflow to load Returns: - Initialisiertes Workflow-Objekt + Initialized workflow object """ current_time = datetime.now().isoformat() if workflow_id is None or not self.lucy_interface.get_workflow(workflow_id): - # Neuen Workflow erstellen + # Create new workflow new_workflow_id = str(uuid.uuid4()) if workflow_id is None else workflow_id workflow = { "id": new_workflow_id, @@ -326,7 +336,8 @@ JSON_OUTPUT = {{ "user_id": self.user_id, "name": f"Workflow {new_workflow_id[:8]}", "started_at": current_time, - "messages": [], + "messages": [], # Empty list - will be filled with references + "message_ids": [], # Initialize empty message_ids list "logs": [], "data_stats": {}, "current_round": 1, @@ -335,28 +346,84 @@ JSON_OUTPUT = {{ "waiting_for_user": False } - # In Datenbank speichern - self.lucy_interface.create_workflow(workflow) + # Save to database - only the workflow metadata + workflow_db = { + "id": workflow["id"], + "mandate_id": workflow["mandate_id"], + "user_id": workflow["user_id"], + "name": workflow["name"], + "started_at": workflow["started_at"], + "status": workflow["status"], + "data_stats": workflow["data_stats"], + "current_round": workflow["current_round"], + "last_activity": workflow["last_activity"], + "waiting_for_user": workflow["waiting_for_user"], + "message_ids": workflow["message_ids"] # Include message_ids + } + self.lucy_interface.create_workflow(workflow_db) + return workflow else: - # Bestehenden Workflow laden + # Load existing workflow workflow = self.lucy_interface.load_workflow_state(workflow_id) - # Status aktualisieren und Rundenzähler inkrementieren + # Ensure message_ids exists + if "message_ids" not in workflow: + # Initialize from existing messages + workflow["message_ids"] = [msg["id"] for msg in workflow.get("messages", [])] + + # Update in database + self.lucy_interface.update_workflow(workflow_id, {"message_ids": workflow["message_ids"]}) + + # Update status and increment round counter workflow["status"] = "running" workflow["last_activity"] = current_time workflow["waiting_for_user"] = False - # Inkrementiere current_round, wenn sie existiert, sonst setze sie auf 1 + # Increment current_round if it exists, otherwise set it to 1 if "current_round" in workflow: workflow["current_round"] += 1 else: workflow["current_round"] = 1 - # In Datenbank aktualisieren - self.lucy_interface.save_workflow_state(workflow) + # Update in database - only the relevant workflow fields + workflow_update = { + "status": workflow["status"], + "last_activity": workflow["last_activity"], + "waiting_for_user": workflow["waiting_for_user"], + "current_round": workflow["current_round"] + } + self.lucy_interface.update_workflow(workflow_id, workflow_update) + return workflow + def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]: + """ + Finalizes a workflow and sets the status to 'completed'. + + Args: + workflow: Workflow object + + Returns: + Updated workflow object + """ + # Prepare workflow update data + workflow_update = { + "status": "completed", + "last_activity": datetime.now().isoformat(), + "waiting_for_user": True + } + + # Update the workflow object in memory + workflow["status"] = workflow_update["status"] + workflow["last_activity"] = workflow_update["last_activity"] + workflow["waiting_for_user"] = workflow_update["waiting_for_user"] + + # Save workflow state to database - only relevant fields, not the messages list + self.lucy_interface.update_workflow(workflow["id"], workflow_update) + + return workflow + async def workflow_summarize(self, workflow: Dict[str, Any], message_user: Dict[str, Any]) -> str: """ Erstellt eine Zusammenfassung des Workflows ohne die aktuelle User-Message. @@ -382,23 +449,6 @@ JSON_OUTPUT = {{ return "\n\n".join(summary_parts) - def workflow_finish(self, workflow: Dict[str, Any]) -> Dict[str, Any]: - """ - Finalisiert einen Workflow und setzt den Status auf 'stopped'. - - Args: - workflow: Workflow-Objekt - - Returns: - Aktualisiertes Workflow-Objekt - """ - workflow["status"] = "completed" - workflow["last_activity"] = datetime.now().isoformat() - workflow["waiting_for_user"] = True - - # In Datenbank speichern - self.lucy_interface.save_workflow_state(workflow) - return workflow ### Agents @@ -412,7 +462,7 @@ JSON_OUTPUT = {{ """ return self.agent_registry.get_agent_infos() - def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]: + async def agent_input_documents(self, doc_input_list: List[Dict[str, Any]], workflow: Dict[str, Any]) -> List[Dict[str, Any]]: """ Prepares input documents for an agent, sorted with newest first. @@ -440,14 +490,15 @@ JSON_OUTPUT = {{ # Search for the document in sorted workflow messages (newest first) for message in sorted_messages: for doc in message.get("documents", []): - if (doc_file_id!="" and doc_file_id==doc.file_id) or (doc_filename!="" and self.get_filename(doc) == doc_filename): + if (doc_file_id!="" and doc_file_id==doc.get("file_id")) or (doc_filename!="" and self.get_filename(doc) == doc_filename): found_doc = doc break if found_doc: break if found_doc: # Process document for agent based on the specification - processed_doc = self.process_document_for_agent(found_doc, doc_spec) + processed_doc = await self.process_document_for_agent(found_doc, doc_spec) + prepared_inputs.append(processed_doc) else: logger.warning(f"Document with label '{doc_filename}', file_id '{doc_file_id}' not found in workflow") @@ -482,7 +533,10 @@ JSON_OUTPUT = {{ # Get the data from the content data = content.get("data", "") processed_content = content.copy() - + + # Check if content data is base64 encoded + is_base64 = content.get("metadata", {}).get("base64_encoded", False) + try: # Use the AI service to process the document content according to the prompt from the project manager for the document specification summary = doc_spec.get("prompt", "Extract the relevant information from this document") @@ -506,13 +560,15 @@ JSON_OUTPUT = {{ {"role": "user", "content": ai_prompt} ]) - # Update the processed content with the AI-processed data - processed_content["data"] = processed_data + # DO NOT change the original data field + # processed_content["data"] unchanged + processed_content["data_extracted"] = processed_data processed_content["metadata"]["ai_processed"] = True except Exception as e: logger.error(f"Error processing document content with AI: {str(e)}") # Fall back to original content if AI processing fails + processed_content["data_extracted"] = "(no information)" processed_contents.append(processed_content) @@ -551,10 +607,7 @@ JSON_OUTPUT = {{ if not agent: logger.error(f"Agent '{agent_name}' not found") return [] - - # Prepare input documents for the agent - input_documents = self.agent_input_documents(task.get('input_documents', []), workflow) - + # Prepare output document specifications output_specs = [] for doc in task.get("output_documents", []): @@ -563,7 +616,10 @@ JSON_OUTPUT = {{ "description": doc.get("prompt", "") } output_specs.append(output_spec) - + + # Prepare input documents for the agent + input_documents = await self.agent_input_documents(task.get('input_documents', []), workflow) + # Create a standardized task object for the agent agent_task = { "task_id": str(uuid.uuid4()), @@ -577,12 +633,17 @@ JSON_OUTPUT = {{ "timestamp": datetime.now().isoformat() } } - + # Execute the agent with the standardized task try: # Process the task using the agent's standardized interface + logger.debug("TASK: "+self.parse_json2text(agent_task)) + logger.debug(f"Agent '{agent_name}' AI service available: {agent.ai_service is not None}") + agent_results = await agent.process_task(agent_task) - + + logger.debug(f"Agent '{agent_name}' completed task. RESULT: {self.parse_json2text(agent_results)}") + # Log the agent response self.log_add( workflow, @@ -596,18 +657,17 @@ JSON_OUTPUT = {{ } # Create a message in the workflow with the agent's response - agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow) + agent_message = await self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow) logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.") return agent_message.get("documents", []) except Exception as e: error_msg = f"Error executing agent '{agent_name}': {str(e)}" - logger.error(error_msg) + logger.error(error_msg, exc_info=True) # Add exc_info=True to get full traceback self.log_add(workflow, error_msg, level="error") return [] - def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]: """ Saves all documents from agent results as files and returns a list of file IDs. @@ -641,25 +701,9 @@ JSON_OUTPUT = {{ # Determine if content is base64 encoded is_base64 = False - if not isinstance(content, bytes): - # Check if content might be base64 encoded - try: - if content and isinstance(content, str): - # Check for base64 pattern (simplified) - if (len(content) % 4 == 0 and - re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)): - # Try to decode a small sample - sample = content[:100] if len(content) > 100 else content - base64.b64decode(sample) - is_base64 = True - except Exception: - # Not base64, treat as regular text - is_base64 = False - - # If content has metadata flag indicating it's base64 - if isinstance(content, dict) and content.get("_is_base64", False): - is_base64 = True - content = content.get("data", "") + if isinstance(content, dict) and content.get("metadata", {}).get("base64_encoded", False): + is_base64 = True + content = content.get("data", "") # Convert content to bytes if isinstance(content, str): @@ -699,46 +743,68 @@ JSON_OUTPUT = {{ def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]: """ - Fügt eine Nachricht zum Workflow hinzu und aktualisiert last_activity. + Adds a message to the workflow and updates last_activity. + Saves the message in the database and updates the workflow with references. Args: - workflow: Workflow-Objekt - message: Zu speichernde Nachricht + workflow: Workflow object + message: Message to be saved Returns: - ID der hinzugefügten Nachricht + Added message """ current_time = datetime.now().isoformat() - # Sicherstellen, dass Messages-Liste existiert + # Ensure messages list exists if "messages" not in workflow: workflow["messages"] = [] - # Neue Nachrichten-ID generieren, falls nicht vorhanden + # Generate new message ID if not present if "id" not in message: message["id"] = f"msg_{str(uuid.uuid4())}" - # Workflow-ID und Zeitstempel hinzufügen + # Add workflow ID and timestamps message["workflow_id"] = workflow["id"] message["started_at"] = current_time message["finished_at"] = current_time - # Sequenznummer setzen + # Set sequence number message["sequence_no"] = len(workflow["messages"]) + 1 - # Status setzen + # Ensure required fields are present + if "role" not in message: + # Set a default role based on agent_name + message["role"] = "assistant" if message.get("agent_name") else "user" + + if "agent_name" not in message: + message["agent_name"] = "" + + # Set status message["status"] = "completed" - # Message zum Workflow hinzufügen + # Add message to workflow workflow["messages"].append(message) - # Workflow-Status aktualisieren - workflow["last_activity"] = current_time - workflow["last_message_id"] = message["id"] + # Ensure message_ids list exists + if "message_ids" not in workflow: + workflow["message_ids"] = [] - # In Datenbank speichern + # Add message ID to the message_ids list + workflow["message_ids"].append(message["id"]) + + # Update workflow status + workflow["last_activity"] = current_time + + # Save to database - first the message itself self.lucy_interface.create_workflow_message(message) + # Then save the workflow with updated references + workflow_update = { + "last_activity": current_time, + "message_ids": workflow["message_ids"] # Update the message_ids field + } + self.lucy_interface.update_workflow(workflow["id"], workflow_update) + return message async def message_summarize(self, message: Dict[str, Any]) -> str: @@ -773,7 +839,7 @@ JSON_OUTPUT = {{ doc_name = self.get_filename(doc) docs_list.append(doc_name) if docs_list: - docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}" + docs_summary = "\nDocuments:" + "\n- ".join(docs_list) return f"[{role} {agent_name}]: {content_summary}{docs_summary}" @@ -832,7 +898,13 @@ JSON_OUTPUT = {{ if file.get("mandate_id") != self.mandate_id: logger.warning(f"File {file_id} does not belong to mandate {self.mandate_id}") continue - + + # Load file content + file_content = self.lucy_interface.get_file_data(file_id) + if file_content is None: + logger.warning(f"No content found for file with ID {file_id}") + continue + # Create document file_name_ext = file.get("name") document = { @@ -840,26 +912,22 @@ JSON_OUTPUT = {{ "file_id": file_id, "name": os.path.splitext(file_name_ext)[0] if os.path.splitext(file_name_ext)[0] else "noname", "ext": os.path.splitext(file_name_ext)[1][1:] if os.path.splitext(file_name_ext)[1] else "bin", + "data": base64.b64encode(file_content).decode('utf-8'), # Add file data as base64 "contents": [] } - # Load contents immediately - file_content = self.lucy_interface.get_file_data(file_id) - if file_content is not None: - # Extract contents with the external function - contents = get_document_contents(file, file_content) - - # Add summaries to each content item - for content in contents: - content["summary"] = await self.message_summarize_content(content) - - document["contents"] = contents - - logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries") - else: - logger.warning(f"No content found for file with ID {file_id}") + # Extract contents + contents = get_document_contents(file, file_content) + # Add summaries to each content item + for content in contents: + content["summary"] = await self.message_summarize_content(content) + + document["contents"] = contents + + logger.info(f"File {file.get('name', 'unnamed')} (ID: {file_id}) loaded with {len(contents)} contents and summaries") documents.append(document) + except Exception as e: logger.error(f"Error processing file {file_id}: {str(e)}") # Continue with remaining files instead of failing diff --git a/modules/chat_agent_analyst.py b/modules/chat_agent_analyst.py index 6e57ca8e..d28cfb43 100644 --- a/modules/chat_agent_analyst.py +++ b/modules/chat_agent_analyst.py @@ -40,7 +40,11 @@ class AgentAnalyst(AgentBase): self.default_figsize = (10, 6) self.chart_dpi = 100 plt.style.use(self.plt_style) - + + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service + async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: """ Process a standardized task structure and perform data analysis. diff --git a/modules/chat_agent_coder.py b/modules/chat_agent_coder.py index 352b4433..b45b8007 100644 --- a/modules/chat_agent_coder.py +++ b/modules/chat_agent_coder.py @@ -1,26 +1,23 @@ """ -Coder agent for development and execution of Python code. -Optimized for the new task-based processing. +Simple Coder Agent for execution of Python code. """ import logging import json -import re -import uuid import os import subprocess import tempfile import shutil import sys -from typing import Dict, Any, List, Optional, Tuple +from typing import Dict, Any, List, Tuple from modules.chat_registry import AgentBase +from modules.configuration import APP_CONFIG logger = logging.getLogger(__name__) - class AgentCoder(AgentBase): - """Agent for development and execution of Python code""" + """Simplified Agent for developing and executing Python code with integrated executor""" def __init__(self): """Initialize the coder agent""" @@ -36,775 +33,474 @@ class AgentCoder(AgentBase): ] # Executor settings - self.executor_timeout = 60 # seconds - self.executor_memory_limit = 512 # MB + self.executor_timeout = APP_CONFIG.get("Agent_Coder_EXECUTION_TIMEOUT") # seconds + self.temp_dir = None - # AI service settings - self.ai_temperature = 0.1 # Lower temperature for deterministic code generation - - # Auto-correction settings - self.max_correction_attempts = 3 # Maximum number of correction attempts - + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service + async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: """ - Process a standardized task structure and perform code development/execution. + Process a task and perform code development/execution. + First checks if the task can be completed without code execution, + then falls back to code generation if needed. Args: - task: A dictionary containing: - - task_id: Unique ID for this task - - prompt: The main instruction for the agent - - input_documents: List of documents to process - - output_specifications: List of required output documents - - context: Additional contextual information - + task: Task dictionary with prompt, input_documents, output_specifications + Returns: - A dictionary containing: - - feedback: Text response explaining the code execution - - documents: List of created document objects + Dictionary with feedback and documents """ - try: - # Extract relevant task information - prompt = task.get("prompt", "") - input_documents = task.get("input_documents", []) - output_specs = task.get("output_specifications", []) - context_info = task.get("context", {}) - - # Check if AI service is available - if not self.ai_service: - logger.error("No AI service configured for the Coder agent") - return { - "feedback": "The Coder agent is not properly configured.", - "documents": [] - } - - # Extract context from input documents - document_context = self._extract_document_context(input_documents) - - # Generate code based on the prompt and document context - logger.info("Generating code based on the task") - code_to_execute, requirements = await self._generate_code_from_prompt(prompt, document_context) - - if not code_to_execute: - logger.warning("AI couldn't generate any code") - return { - "feedback": "I couldn't generate executable code based on the task. Please provide more detailed instructions.", - "documents": [] - } - - logger.info(f"Code generated with AI ({len(code_to_execute)} characters)") - - # Collect created documents - generated_documents = [] - - # Add code as first document - code_doc = { - "label": "generated_code.py", - "content": code_to_execute - } - generated_documents.append(code_doc) - - # Execute code with auto-correction loop - execution_context = { - "input_documents": input_documents, - "task": task - } - - # Enhanced execution with auto-correction - result, attempts_info = await self._execute_with_auto_correction( - code_to_execute, - requirements, - execution_context, - prompt # Original prompt/message - ) - - # Create output documents based on execution result and output specifications - if result.get("success", False): - # Code execution successful - output = result.get("output", "") - execution_result = result.get("result") - logger.info("Code executed successfully") - - # Determine output type of the result - result_docs = self._generate_result_documents( - attempts_info[-1]["code"], # Last successful code - output, - execution_result, - output_specs - ) - - # Add result documents - generated_documents.extend(result_docs) - - # Create feedback for successful execution - feedback = f"I successfully executed the code and generated {len(result_docs)} output files." - if attempts_info and len(attempts_info) > 1: - feedback += f" (This required {len(attempts_info)-1} correction attempts)" - - else: - # Code execution failed after all attempts - error = result.get("error", "Unknown error") - logger.error(f"Error in code execution after all correction attempts: {error}") - - # Add error log as additional document - error_doc = { - "label": "execution_error.txt", - "content": f"Execution error:\n\n{error}" - } - generated_documents.append(error_doc) - - # Create feedback for failed execution - feedback = f"An error occurred during code execution after {len(attempts_info)} correction attempts." - - # If no specific outputs requested, create standard outputs - if not output_specs and result.get("success", False): - # Add standard output document - output_doc = { - "label": "execution_output.txt", - "content": output - } - generated_documents.append(output_doc) - - # If a result is available, also add as JSON document - if execution_result: - result_json = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) - result_doc = { - "label": "execution_result.json", - "content": result_json - } - generated_documents.append(result_doc) - + # 1. Extract task information + prompt = task.get("prompt", "") + input_documents = task.get("input_documents", []) + output_specs = task.get("output_specifications", []) + + # Check if AI service is available + if not self.ai_service: + logger.error("No AI service configured for the Coder agent") return { - "feedback": feedback, - "documents": generated_documents - } - - except Exception as e: - error_msg = f"Error during processing by the Coder agent: {str(e)}" - logger.error(error_msg) - return { - "feedback": f"An error occurred during code processing: {str(e)}", + "feedback": "The Coder agent is not properly configured.", "documents": [] } - - def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str: - """ - Extract context from input documents for code generation. - Args: - documents: List of document objects + # 2. Extract data from documents in separate categories + document_data = [] # For raw file data (for code execution) + content_data = [] # For content data (later use) + content_extraction = [] # For AI-extracted data (for quick completion) + + for doc in input_documents: + # Create proper filename from name and ext + filename = f"{doc.get('name')}.{doc.get('ext')}" if doc.get('ext') else doc.get('name') - Returns: - Extracted context as text - """ - context_parts = [] - - for doc in documents: - doc_name = doc.get("name", "Unnamed document") - context_parts.append(f"--- {doc_name} ---") + # Add main document data to document_data if it exists + doc_data = doc.get('data', '') + if doc_data: + is_base64 = True # Assume base64 encoded for document data + document_data.append([filename, doc_data, is_base64]) - for content in doc.get("contents", []): - if content.get("metadata", {}).get("is_text", False): - context_parts.append(content.get("data", "")) + # Process contents for different uses + if doc.get('contents'): + for content in doc.get('contents', []): + content_name = content.get('name', 'unnamed') + + # For AI-extracted data (quick completion) + if content.get('data_extracted'): + content_extraction.append({ + "filename": filename, + "content_name": content_name, + "content_data": content.get('data_extracted', ''), + "content_type": content.get('content_type', ''), + "summary": content.get('summary', '') + }) + + # For raw content data + if content.get('data'): + raw_data = content.get('data', '') + is_base64 = content.get('metadata', {}).get('base64_encoded', False) + content_data.append({ + "filename": filename, + "content_name": content_name, + "data": raw_data, + "is_base64": is_base64, + "content_type": content.get('content_type', '') + }) + + # Also add to document_data for code execution if not already added + if not doc_data or doc_data != raw_data: + document_data.append([filename, raw_data, is_base64]) - return "\n\n".join(context_parts) - - def _generate_result_documents(self, code: str, output: str, execution_result: Any, - output_specs: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Generate output documents based on execution results and specifications. + # 3. Check if task can be completed without code execution + quick_completion = await self._check_quick_completion(prompt, content_extraction, output_specs) - Args: - code: Executed code - output: Text output of the execution - execution_result: Result object from execution - output_specs: Output specifications - - Returns: - List of generated document objects - """ + if quick_completion and quick_completion.get("complete") == 1: + logger.info("Task completed without code execution") + return { + "feedback": quick_completion.get("prompt", "Task completed successfully."), + "documents": quick_completion.get("documents", []) + } + else: + logger.debug(f"Code to generate, quick check responded: {quick_completion.get("prompt", "(no answer)")}") + + # If quick completion not possible, continue with code generation and execution + logger.info("Generating code to solve the task") + + # 4. Generate code using AI + code, requirements = await self._generate_code(prompt, document_data) + + if not code: + return { + "feedback": "Failed to generate code for the task.", + "documents": [] + } + + # 5. Replace the placeholder with actual input_files data + document_data_json = json.dumps(document_data) + code_with_data = code.replace("input_files = \"=== JSONLOAD ===\"", f"input_files = {document_data_json }") + + # 6. Execute code and get results + execution_result = self._execute_code(code_with_data, requirements) + + # 7. Process results and create output documents documents = [] - # If no specific outputs requested - if not output_specs: - return documents + # Always add the code document + documents.append({ + "label": "generated_code.py", + "content": code_with_data + }) - # Generate appropriate document for each requested output - for spec in output_specs: - output_label = spec.get("label", "") - output_description = spec.get("description", "") + # Create documents based on execution results + if execution_result.get("success", False): + result_data = execution_result.get("result") - # Determine output type based on file extension - format_type = self._determine_format_type(output_label) - - # Generate document content based on format and output - if "code" in output_label.lower() or format_type in ["py", "js", "html", "css"]: - # Code document - documents.append({ - "label": output_label, - "content": code - }) - elif "output" in output_label.lower() or format_type == "txt": - # Output document - documents.append({ - "label": output_label, - "content": output - }) - elif format_type in ["json", "yml", "yaml"] and execution_result: - # JSON result document - if isinstance(execution_result, (dict, list)): - content = json.dumps(execution_result, indent=2) - else: - content = str(execution_result) - - documents.append({ - "label": output_label, - "content": content - }) - else: - # Generic result document (fallback) - result_str = "" - if execution_result: - if isinstance(execution_result, (dict, list)): - result_str = json.dumps(execution_result, indent=2) - else: - result_str = str(execution_result) - - documents.append({ - "label": output_label, - "content": f"Code output:\n\n{output}\n\nResult:\n\n{result_str}" - }) - - return documents - - def _determine_format_type(self, output_label: str) -> str: - """ - Determine the format type based on the filename. - - Args: - output_label: Output filename - - Returns: - Format type (py, js, json, txt, etc.) - """ - if not '.' in output_label: - return "txt" # Default format - - extension = output_label.split('.')[-1].lower() - return extension - - async def _execute_with_auto_correction( - self, - initial_code: str, - requirements: List[str], - context: Dict[str, Any], - original_prompt: str - ) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]: - """ - Execute code with automatic error correction and retry attempts. - - Args: - initial_code: The initial Python code - requirements: List of required packages - context: Additional context for execution - original_prompt: The original user request/prompt - - Returns: - Tuple of (final execution result, list of attempt info dictionaries) - """ - # Initialize tracking data - current_code = initial_code - current_requirements = requirements.copy() if requirements else [] - attempts_info = [] - - # Execute with correction loop - for attempt in range(1, self.max_correction_attempts + 1): - if attempt == 1: - logger.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})") - else: - logger.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})") - - # Execute current code version - result = await self._execute_code(current_code, current_requirements, context) - - # Record attempt information - attempts_info.append({ - "attempt": attempt, - "code": current_code, - "error": result.get("error", ""), - "success": result.get("success", False) - }) - - # Check if execution was successful - if result.get("success", False): - # Success! Return result and attempt info - return result, attempts_info - - # Failed execution - check if max attempt limit reached - if attempt >= self.max_correction_attempts: - logger.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached") - break - - # Correct code based on the error - error_message = result.get("error", "Unknown error") - - logger.info(f"Attempting to fix code error: {error_message[:200]}...") - - # Generate corrected code - corrected_code, new_requirements = await self._generate_code_correction( - current_code, - error_message, - original_prompt, - current_requirements - ) - - # Update for next attempt - if corrected_code: - current_code = corrected_code - - # Add new requirements - if new_requirements: - for req in new_requirements: - if req not in current_requirements: - current_requirements.append(req) - logger.info(f"Added new requirement: {req}") - else: - # Correction couldn't be generated, end loop - logger.warning("Couldn't generate code correction") - break - - # If we reach here, all attempts failed - return last result and attempt info - return result, attempts_info - - async def _generate_code_correction( - self, - code: str, - error_message: str, - original_prompt: str, - current_requirements: List[str] = None - ) -> Tuple[str, List[str]]: - """ - Generate a corrected version of code based on error messages. - - Args: - code: The code that generated errors - error_message: The error message to fix - original_prompt: The original task/requirements - current_requirements: List of currently required packages - - Returns: - Tuple of (corrected code, new requirements list) - """ - try: - # Create detailed prompt for code correction - correction_prompt = f"""You need to fix an error in Python code. The code was written for this task: - -ORIGINAL TASK: -{original_prompt} - -CURRENT CODE: -```python -{code} -``` - -ERROR MESSAGE: -``` -{error_message} -``` - -CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"} - -Your task is to analyze the error and provide a corrected version of the code. -Focus specifically on fixing the error while maintaining the original functionality. - -Common fixes include: -- Fixing syntax errors (missing parentheses, indentation, etc.) -- Solving import errors by adding appropriate requirements -- Correcting file paths or handling "file not found" errors -- Adding error handling for specific edge cases -- Fixing logical errors in the code - -FORMATTING GUIDELINES: -1. Provide ONLY the complete corrected Python code WITHOUT explanations -2. Do NOT use code block markers like ```python or ``` -3. Do NOT explain what the code does before or after -4. Do NOT add any text that isn't valid Python code -5. Start your answer directly with valid Python code -6. End your answer with valid Python code - -If you need to add new required packages, place them in a specially formatted comment at the beginning of your code as follows: -# REQUIREMENTS: package1,package2,package3 - -Your entire answer must be valid Python that can be executed without modifications. -""" - - # Create messages for API - messages = [ - {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, error-free Python code, without explanations, markdown formatting, or text that isn't code."}, - {"role": "user", "content": correction_prompt} - ] - - # Call API with very low temperature for deterministic corrections - generated_content = await self.ai_service.call_api( - messages, - temperature=0.1 - ) - - # Clean up the generated content to ensure it's only valid Python code - fixed_code = self._clean_code(generated_content) - - # Extract requirements from special comment at beginning of code - new_requirements = [] - for line in fixed_code.split('\n'): - if line.strip().startswith("# REQUIREMENTS:"): - req_str = line.replace("# REQUIREMENTS:", "").strip() - new_requirements = [r.strip() for r in req_str.split(',') if r.strip()] - break - - return fixed_code, new_requirements + # Create documents based on output specifications + if output_specs: + for spec in output_specs: + label = spec.get("label", "output.txt") - except Exception as e: - logging.error(f"Error generating code correction: {str(e)}") - # Return None to indicate failure - return None, [] - - def _clean_code(self, code: str) -> str: - """ - Clean code by removing markdown code block markers and other formatting artifacts. - - Args: - code: The code string to clean - - Returns: - Cleaned code string - """ - # Remove code block markers at beginning/end - code = re.sub(r'^```(?:python)?\s*', '', code) - code = re.sub(r'```\s*$', '', code) - - # Process lines in reverse order to start from the end - lines = code.split('\n') - clean_lines = [] - in_trailing_markdown = False - - for line in reversed(lines): - stripped = line.strip() - - # Check if this line contains only backticks (``` or ` or ``) - if re.match(r'^`{1,3}$', stripped): - in_trailing_markdown = True - continue + # Extract content from result if available + content = "" + if isinstance(result_data, dict) and label in result_data: + content = result_data[label] + else: + # Default to execution output + content = execution_result.get("output", "") + + documents.append({ + "label": label, + "content": content + }) + else: + # No output specs, create default output document + documents.append({ + "label": "execution_output.txt", + "content": execution_result.get("output", "") + }) - # If we've reached actual code, no more trailing markdown consideration - if stripped and not in_trailing_markdown: - in_trailing_markdown = False - - # Add this line if it's not part of trailing markdown - if not in_trailing_markdown: - clean_lines.insert(0, line) + feedback = "Code executed successfully. Generated output files based on specifications." + else: + # Execution failed + error = execution_result.get("error", "Unknown error") + documents.append({ + "label": "execution_error.txt", + "content": f"Error executing code:\n\n{error}" + }) + feedback = f"Error during code execution: {error}" - # Rejoin lines - clean_code = '\n'.join(clean_lines) - - # Final cleanup for any remaining backticks - clean_code = re.sub(r'`{1,3}\s*', '', clean_code) - - return clean_code.strip() + return { + "feedback": feedback, + "documents": documents + } - async def _generate_code_from_prompt(self, prompt: str, document_context: str) -> Tuple[str, List[str]]: + async def _check_quick_completion(self, prompt: str, content_extraction: List[Dict], output_specs: List[Dict]) -> Dict: """ - Generate Python code from a prompt using the AI service. + Check if the task can be completed without writing and executing code. Args: - prompt: The prompt to generate code from - document_context: Context extracted from documents + prompt: The task prompt + content_extraction: List of extracted content data with content_name and data_extracted + output_specs: List of output specifications Returns: - Tuple of (generated Python code, required packages) + Dictionary with completion status and results, or None if no quick completion """ - try: - # Prepare prompt for code generation - ai_prompt = f"""Generate Python code to solve the following task: + # If no data or no output specs, can't do a quick completion + if not content_extraction or not output_specs: + return None + + # Create a prompt for the AI to check if this can be completed directly + specs_json = json.dumps(output_specs) + data_json = json.dumps(content_extraction) + + check_prompt = f""" +Analyze this task and determine if it can be completed directly without writing code. TASK: {prompt} -PROVIDED CONTEXT: -{document_context if document_context else "No additional context available."} +EXTRACTED DATA AVAILABLE: +{data_json} -IMPORTANT REQUIREMENTS: -1. Your code MUST define a 'result' variable to store the final result. -2. At the end of your script, the result variable should be output. -3. Make your 'result' variable a dictionary or other JSON-serializable data structure containing all relevant outputs. -4. Comment your code well to explain important operations. -5. Make your code complete and self-contained. -6. Add appropriate error handling. +Each entry in the extracted data contains: +- filename: The source file name +- content_name: The specific content section name +- content_data: The AI-extracted text from the content +- content_type: The type of content (text, csv, etc.) +- summary: A brief summary of the content -FORMATTING INSTRUCTIONS: -- Return ONLY the Python code, WITHOUT introduction, explanation, or conclusion text -- Do NOT use code block markers like ```python or ``` -- Do NOT explain what the code does before or after -- Do NOT add any text that isn't valid Python code -- Start your answer directly with valid Python code -- End your answer with valid Python code +REQUIRED OUTPUT: +{specs_json} -For required packages, place them in a specially formatted comment at the beginning of your code in one line as follows: -# REQUIREMENTS: pandas,numpy,matplotlib,requests +If the task can be completed directly with the available extracted data, respond with: +{{"complete": 1, "prompt": "Brief explanation of the solution", "documents": [ + {{"label": "filename.ext", "content": "content here"}} +]}} -Your entire answer must be valid Python that can be executed without modifications. +If code would be needed to properly complete this task, respond with: +{{"complete": 0, "prompt": "Explanation why code is needed"}} + +Only return valid JSON. Your entire response must be parseable as JSON. """ + + # Call AI service + logger.debug("Checking if task can be completed without code execution") + messages = [ + {"role": "system", "content": "You are an AI assistant that determines if tasks require code execution. Reply with JSON only."}, + {"role": "user", "content": check_prompt} + ] + + try: + # Use a lower temperature for more deterministic response + response = await self.ai_service.call_api(messages, temperature=0.1) - # Create messages for API - messages = [ - {"role": "system", "content": "You are a Python code generator who provides ONLY clean, executable Python code with no explanations, markdown formatting, or non-code text."}, - {"role": "user", "content": ai_prompt} - ] - - # Call API - logging.info(f"Calling AI API to generate code") - generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature) - - # Clean up the generated content to ensure it's only valid Python code - code = self._clean_code(generated_content) - - # Extract requirements from special comment at beginning of code - requirements = [] - for line in code.split('\n'): - if line.strip().startswith("# REQUIREMENTS:"): - req_str = line.replace("# REQUIREMENTS:", "").strip() - requirements = [r.strip() for r in req_str.split(',') if r.strip()] - break - - return code, requirements + # Parse response as JSON + if response: + try: + # Find JSON in response if there's any text around it + json_start = response.find('{') + json_end = response.rfind('}') + 1 - except Exception as e: - logging.error(f"Error generating code with AI: {str(e)}") - # Return basic error handling code and no requirements - error_str = str(e).replace('"', '\\"') - return f""" -# Error in code generation -print(f"An error occurred during code generation: {error_str}") -# Return error result -result = {{"error": "Code generation failed", "message": "{error_str}"}} -""", [] + if json_start >= 0 and json_end > json_start: + json_str = response[json_start:json_end] + result = json.loads(json_str) + + # Check if this is a proper response + if "complete" in result: + return result - async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]: + except json.JSONDecodeError: + logger.debug("Failed to parse quick completion response as JSON") + pass + except Exception as e: + logger.debug(f"Error during quick completion check: {str(e)}") + + # Default to requiring code execution + return None + + async def _generate_code(self, prompt: str, input_files: List) -> Tuple[str, List[str]]: """ - Execute Python code in an isolated environment. + Generate Python code from a prompt with the input_files placeholder. Args: - code: The Python code to execute - requirements: List of required packages - context: Additional context for execution + prompt: The task prompt + input_files: List of [filename, data, is_base64] items Returns: - Result of code execution + Tuple of (code, requirements) """ - # Use virtual code executor for isolated execution - try: - executor = SimpleCodeExecutor( - timeout=self.executor_timeout, - max_memory_mb=self.executor_memory_limit, - requirements=requirements, - ai_service=self.ai_service - ) - - # Prepare input data for the code - input_data = {"context": context} if context else {} - - # Execute code - result = executor.execute_code(code, input_data) - - # Clean up environment - executor.cleanup() - - return result - - except Exception as e: - error_message = f"Error during code execution: {str(e)}" - logger.error(error_message) - - return { - "success": False, - "output": "", - "error": error_message, - "result": None - } + # Create prompt for code generation + ai_prompt = f""" +Generate Python code to solve the following task: +TASK: +{prompt} -class SimpleCodeExecutor: - """ - A simplified executor that runs Python code in isolated virtual environments. - """ - - def __init__(self, - timeout: int = 30, - max_memory_mb: int = 512, - requirements: List[str] = None, - ai_service = None): - """ - Initialize the SimpleCodeExecutor. - - Args: - timeout: Maximum execution time in seconds - max_memory_mb: Maximum memory in MB - requirements: List of packages to install - ai_service: Optional - AI service for further processing - """ - self.timeout = timeout - self.max_memory_mb = max_memory_mb - self.temp_dir = None - self.requirements = requirements or [] - self.blocked_packages = [ - "cryptography", "flask", "django", "tornado", # Security risks - "tensorflow", "pytorch", "scikit-learn" # Resource-intensive packages +IMPORTANT: +- All input files are provided in the 'input_files' variable as a list of [filename, data, is_base64]. +- The 'input_files' variable is already defined at the top of your code, DO NOT modify it. +- For each file, you can access: + - filename: The name of the file (e.g., "image.png") + - data: The content of the file (base64 encoded or plain text) + - is_base64: Boolean flag indicating if the data is base64 encoded + +- To use a file's data: + - For text files (when is_base64=False): Use the data directly as a string + - For binary files (when is_base64=True): Use base64.b64decode(data) to get bytes + +- Do not perform any additional base64 detection - rely on the is_base64 flag + +- Your code MUST define a 'result' variable as a dictionary to store outputs. +- Each output file should be a key in the result dictionary. +- For example: result = {{"output.txt": "output text", "results.json": json_string}} + +Your code must start with: +input_files = "=== JSONLOAD ===" # DO NOT CHANGE THIS LINE + +Required packages should be specified as: +# REQUIREMENTS: package1,package2,package3 + +Return ONLY Python code without explanations or markdown formatting. +""" + + # Call AI service + messages = [ + {"role": "system", "content": "You are a Python code generator. Provide only valid Python code without explanations or formatting."}, + {"role": "user", "content": ai_prompt} ] - self.ai_service = ai_service - - def _create_venv(self) -> str: - """Create a virtual environment and return the path.""" - # Create new environment - venv_parent_dir = tempfile.mkdtemp(prefix="code_exec_") - self.temp_dir = venv_parent_dir - venv_path = os.path.join(venv_parent_dir, "venv") - try: - # Create virtual environment - subprocess.run([sys.executable, "-m", "venv", venv_path], - check=True, - capture_output=True) - - return venv_path - except subprocess.CalledProcessError as e: - logger.error(f"Error creating virtual environment: {e}") - raise RuntimeError(f"Virtual environment could not be created: {e}") + generated_content = await self.ai_service.call_api(messages, temperature=0.1) + + # Extract code and requirements + code = self._clean_code(generated_content) + + # Extract requirements + requirements = [] + for line in code.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + req_str = line.replace("# REQUIREMENTS:", "").strip() + requirements = [r.strip() for r in req_str.split(',') if r.strip()] + break + + return code, requirements - def _get_python_executable(self, venv_path: str) -> str: - """Return the path to the Python executable in the virtual environment.""" - if os.name == 'nt': # Windows - return os.path.join(venv_path, "Scripts", "python.exe") - else: # Unix/Linux - return os.path.join(venv_path, "bin", "python") - - def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: + def _execute_code(self, code: str, requirements: List[str] = None) -> Dict[str, Any]: """ - Execute Python code in an isolated environment. + Execute Python code in a virtual environment. + Integrated executor functionality. Args: code: Python code to execute - input_data: Optional input data for the code + requirements: List of required packages Returns: - Dictionary with execution results + Execution result dictionary """ - logger.info("Executing code in isolated environment") - - # Create virtual environment - venv_path = self._create_venv() - - # Create file for the code - code_id = uuid.uuid4().hex[:8] - code_file = os.path.join(self.temp_dir, f"code_{code_id}.py") - - # Write code - with open(code_file, "w", encoding="utf-8") as f: - f.write(code) - - # Get Python executable - python_executable = self._get_python_executable(venv_path) - logger.info(f"Using Python executable: {python_executable}") - - # Execute code try: - # Execute code from root directory - working_dir = os.path.dirname(code_file) + # 1. Create temp directory and virtual environment + self.temp_dir = tempfile.mkdtemp(prefix="code_exec_") + venv_path = os.path.join(self.temp_dir, "venv") + + # Create venv + logger.debug(f"Creating virtual environment at {venv_path}") + subprocess.run([sys.executable, "-m", "venv", venv_path], + check=True, capture_output=True) + + # Get Python executable path + python_exe = os.path.join(venv_path, "Scripts", "python.exe") if os.name == 'nt' else os.path.join(venv_path, "bin", "python") + + # 2. Install requirements if provided + if requirements: + logger.debug(f"Installing requirements: {requirements}") + + # Create requirements.txt + req_file = os.path.join(self.temp_dir, "requirements.txt") + with open(req_file, "w") as f: + f.write("\n".join(requirements)) + + # Install requirements + try: + pip_result = subprocess.run( + [python_exe, "-m", "pip", "install", "-r", req_file], + capture_output=True, + text=True, + timeout=APP_CONFIG.get("Agent_Coder_INSTALL_TIMEOUT") + ) + if pip_result.returncode != 0: + logger.debug(f"Error installing requirements: {pip_result.stderr}") + else: + logger.debug(f"Requirements installed successfully") + # Log installed packages if in debug mode + if logger.isEnabledFor(logging.DEBUG): + pip_list = subprocess.run( + [python_exe, "-m", "pip", "list"], + capture_output=True, + text=True + ) + logger.debug(f"Installed packages:\n{pip_list.stdout}") + + except Exception as e: + logger.debug(f"Exception during requirements installation: {str(e)}") + + # 3. Write code to file + code_file = os.path.join(self.temp_dir, "code.py") + with open(code_file, "w", encoding="utf-8") as f: + f.write(code) + + # 4. Execute code + logger.debug(f"Executing code with timeout of {self.executor_timeout} seconds. Code: {code}") process = subprocess.run( - [python_executable, code_file], - timeout=self.timeout, + [python_exe, code_file], + timeout=self.executor_timeout, capture_output=True, - text=True, - cwd=working_dir + text=True ) - - # Process output + + # 5. Process results stdout = process.stdout stderr = process.stderr - # Get result from stdout if available + # Try to extract result from stdout result_data = None - if process.returncode == 0 and stdout: + if process.returncode == 0: try: - # Look for the last line that could be JSON + # Find the last line that might be JSON for line in reversed(stdout.strip().split('\n')): line = line.strip() if line and line[0] in '{[' and line[-1] in '}]': try: result_data = json.loads(line) - # Use successfully parsed JSON result + logger.debug(f"Extracted result data from stdout: {type(result_data)}") break except json.JSONDecodeError: - # Not valid JSON, continue with next line continue except Exception as e: - logger.warning(f"Error parsing result from stdout: {str(e)}") - + logger.debug(f"Error extracting result from stdout: {str(e)}") + # Create result dictionary - execution_result = { + return { "success": process.returncode == 0, "output": stdout, "error": stderr if process.returncode != 0 else "", "result": result_data, "exit_code": process.returncode } - + except subprocess.TimeoutExpired: - logger.error(f"Execution timed out after {self.timeout} seconds") - execution_result = { + logger.error(f"Execution timed out after {self.executor_timeout} seconds") + return { "success": False, "output": "", - "error": f"Execution timed out (timeout after {self.timeout} seconds)", + "error": f"Execution timed out after {self.executor_timeout} seconds", "result": None, "exit_code": -1 } except Exception as e: logger.error(f"Execution error: {str(e)}") - execution_result = { + return { "success": False, "output": "", "error": f"Execution error: {str(e)}", "result": None, "exit_code": -1 } - - # Clean up temporary code file - try: - if os.path.exists(code_file): - os.remove(code_file) - except Exception as e: - logger.warning(f"Error cleaning up temporary code file: {e}") - - return execution_result - - def cleanup(self): - """Clean up temporary resources.""" - # Clean up temporary directory + finally: + # Clean up resources + self._cleanup_execution() + + def _cleanup_execution(self): + """Clean up temporary resources from code execution.""" if self.temp_dir and os.path.exists(self.temp_dir): try: + logger.debug(f"Cleaning up temporary directory: {self.temp_dir}") shutil.rmtree(self.temp_dir) - logger.info(f"Temporary directory deleted: {self.temp_dir}") + self.temp_dir = None except Exception as e: - logger.warning(f"Temporary directory {self.temp_dir} could not be deleted: {e}") - - def __del__(self): - """Cleanup during garbage collection.""" - self.cleanup() - + logger.warning(f"Error cleaning up temp directory: {str(e)}") + + def _clean_code(self, code: str) -> str: + """Remove any markdown formatting or explanations.""" + # Remove code block markers + code = code.replace("```python", "").replace("```", "") + + # Remove explanations before or after code + lines = code.strip().split('\n') + start_index = 0 + end_index = len(lines) + + # Find start of actual code + for i, line in enumerate(lines): + if line.strip().startswith("input_files =") or line.strip().startswith("# REQUIREMENTS:"): + start_index = i + break + + # Clean code + cleaned_code = '\n'.join(lines[start_index:end_index]) + return cleaned_code.strip() + # Factory function for the Coder agent def get_coder_agent(): - """ - Factory function that returns an instance of the Coder agent. - - Returns: - An instance of the Coder agent - """ + """Returns an instance of the Coder agent.""" return AgentCoder() \ No newline at end of file diff --git a/modules/chat_agent_creative.py b/modules/chat_agent_creative.py index cf705d68..17fd7684 100644 --- a/modules/chat_agent_creative.py +++ b/modules/chat_agent_creative.py @@ -26,7 +26,11 @@ class AgentCreative(AgentBase): "document_generation", "question_answering" ] - + + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service + async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: """ Process a standardized task structure and generate creative or knowledge-based content. @@ -135,7 +139,7 @@ class AgentCreative(AgentBase): "feedback": f"An error occurred while creating creative content: {str(e)}", "documents": [] } - + def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str: """ Extract context from input documents. diff --git a/modules/chat_agent_documentation.py b/modules/chat_agent_documentation.py index c0a75afd..0756e158 100644 --- a/modules/chat_agent_documentation.py +++ b/modules/chat_agent_documentation.py @@ -26,7 +26,11 @@ class AgentDocumentation(AgentBase): "technical_writing", "knowledge_organization" ] - + + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service + async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: """ Process a standardized task structure and create documentation. diff --git a/modules/chat_agent_webcrawler.py b/modules/chat_agent_webcrawler.py index d54bcca7..b5f1902a 100644 --- a/modules/chat_agent_webcrawler.py +++ b/modules/chat_agent_webcrawler.py @@ -36,10 +36,14 @@ class AgentWebcrawler(AgentBase): ] # Web crawling configuration - self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5")) - self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3")) - self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5")) - self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30")) + self.max_url = int(APP_CONFIG.get("Agent_Webcrawler_MAX_URLS", "5")) + self.max_key = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_KEYWORDS", "3")) + self.max_result = int(APP_CONFIG.get("Agent_Webcrawler_MAX_SEARCH_RESULTS", "5")) + self.timeout = int(APP_CONFIG.get("Agent_Webcrawler_TIMEOUT", "30")) + + def set_dependencies(self, ai_service=None): + """Set external dependencies for the agent.""" + self.ai_service = ai_service async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]: @@ -542,7 +546,7 @@ class AgentWebcrawler(AgentBase): List of search results """ formatted_query = quote_plus(query) - url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}" + url = f"{APP_CONFIG.get('Agent_Webcrawler_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}" search_results_soup = self._read_url(url) if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'): @@ -614,7 +618,7 @@ class AgentWebcrawler(AgentBase): BeautifulSoup object with the content or empty on errors """ headers = { - 'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"), + 'User-Agent': APP_CONFIG.get("Agent_Webcrawler_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"), 'Accept': 'text/html,application/xhtml+xml,application/xml', 'Accept-Language': 'en-US,en;q=0.9', } diff --git a/modules/chat_content_extraction.py b/modules/chat_content_extraction.py index a01312d7..3fa5485a 100644 --- a/modules/chat_content_extraction.py +++ b/modules/chat_content_extraction.py @@ -98,14 +98,16 @@ def get_document_contents(file_metadata: Dict[str, Any], file_content: bytes) -> } }) + # Add generic attributes for all documents + for content in contents: if isinstance(content.get("data"), bytes): content["data"] = base64.b64encode(content["data"]).decode('utf-8') - # Markiere in Metadaten, dass dies base64-kodiert ist + # Add base64 flag if "metadata" not in content: content["metadata"] = {} content["metadata"]["base64_encoded"] = True - + logger.info(f"Erfolgreich {len(contents)} Inhalte aus Datei '{file_name}' extrahiert") return contents diff --git a/modules/chat_registry.py b/modules/chat_registry.py index c189148d..be7bad95 100644 --- a/modules/chat_registry.py +++ b/modules/chat_registry.py @@ -180,7 +180,11 @@ class AgentRegistry: Agent instance or None if not found """ if agent_identifier in self.agents: - return self.agents[agent_identifier] + agent = self.agents[agent_identifier] + # Ensure the agent has the AI service + if hasattr(agent, 'set_dependencies') and self.ai_service: + agent.set_dependencies(ai_service=self.ai_service) + return agent logger.error(f"Agent with identifier '{agent_identifier}' not found") return None diff --git a/modules/lucydom_interface.py b/modules/lucydom_interface.py index f8607580..9bd292fc 100644 --- a/modules/lucydom_interface.py +++ b/modules/lucydom_interface.py @@ -1,3 +1,4 @@ +import os import logging import uuid from datetime import datetime @@ -578,12 +579,15 @@ class LucyDOMInterface: # 2. Speichere Binärdaten als Base64-String in der 'file_data'-Tabelle logger.info(f"Saving file content to database for file: {file_name}") self.create_file_data(db_file["id"], file_content) - + + # Debug: Export file to static folder + if logger.isEnabledFor(logging.DEBUG): self._export_file_to_static(file_content, db_file["id"], file_name) + # Debug: Verify database record was created if not db_file: logger.warning(f"Database record for file {file_name} was not created properly") else: - logger.info(f"Database record created for file {file_name}") + logger.debug(f"Database record created for file {file_name}") logger.info(f"File upload process completed for: {file_name}") return db_file @@ -629,6 +633,10 @@ class LucyDOMInterface: logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}") raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}") + def _export_file_to_static(self, file_content: bytes, file_id: int, file_name: str): + debug_filename = f"{file_id}_{file_name}" + with open(f"./static/{debug_filename}", 'wb') as f: + f.write(file_content) # Workflow Methoden @@ -664,10 +672,6 @@ class LucyDOMInterface: if "last_activity" not in workflow_data: workflow_data["last_activity"] = current_time - # Stelle sicher, dass last_message_id gesetzt ist, falls nicht vorhanden - if "last_message_id" not in workflow_data: - workflow_data["last_message_id"] = "" - return self.db.record_create("workflows", workflow_data) def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]: @@ -723,70 +727,93 @@ class LucyDOMInterface: return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id}) def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]: - """Erstellt eine neue Nachricht für einen Workflow + """ + Creates a new message for a workflow. Args: - message_data: Die Nachrichtendaten + message_data: The message data Returns: - Die erstellte Nachricht oder None bei Fehler + The created message or None on error """ try: # Check if required fields are present required_fields = ["id", "workflow_id"] for field in required_fields: if field not in message_data: - logger.error(f"Pflichtfeld '{field}' fehlt in message_data") - raise ValueError(f"Pflichtfeld '{field}' fehlt in den Nachrichtendaten") + logger.error(f"Required field '{field}' missing in message_data") + raise ValueError(f"Required field '{field}' missing in message data") # Validate that ID is not None if message_data["id"] is None: message_data["id"] = f"msg_{uuid.uuid4()}" - logger.warning(f"Automatisch generierte ID für Workflow-Nachricht: {message_data['id']}") + logger.warning(f"Automatically generated ID for workflow message: {message_data['id']}") - # Stellen Sie sicher, dass die benötigten Felder vorhanden sind + # Ensure required fields are present if "started_at" not in message_data and "created_at" not in message_data: message_data["started_at"] = self._get_current_timestamp() - # Wenn "created_at" vorhanden ist, übertrage es nach "started_at" if "created_at" in message_data and "started_at" not in message_data: message_data["started_at"] = message_data["created_at"] del message_data["created_at"] - # Status setzen, falls nicht vorhanden + # Set status if not present if "status" not in message_data: message_data["status"] = "completed" - # Sequenznummer setzen, falls nicht vorhanden + # Set sequence number if not present if "sequence_no" not in message_data: - # Hole aktuelle Nachrichten, um die nächste Sequenznummer zu bestimmen + # Get current messages to determine next sequence number existing_messages = self.get_workflow_messages(message_data["workflow_id"]) message_data["sequence_no"] = len(existing_messages) + 1 - # Debug-Log für die zu erstellenden Daten - logger.debug(f"Erstelle Workflow-Nachricht mit Daten: {message_data}") + # Ensure role and agent_name are present + if "role" not in message_data: + message_data["role"] = "assistant" if message_data.get("agent_name") else "user" - return self.db.record_create("workflow_messages", message_data) + if "agent_name" not in message_data: + message_data["agent_name"] = "" + + # Debug log for data to create + logger.debug(f"Creating workflow message with data: {message_data}") + + # Create message in database + created_message = self.db.record_create("workflow_messages", message_data) + + # Update workflow's message_ids if this is a new message + if created_message: + workflow_id = message_data["workflow_id"] + workflow = self.get_workflow(workflow_id) + + if workflow: + # Get current message_ids or initialize empty list + message_ids = workflow.get("message_ids", []) + + # Add the new message ID if not already in the list + if created_message["id"] not in message_ids: + message_ids.append(created_message["id"]) + self.update_workflow(workflow_id, {"message_ids": message_ids}) + + return created_message except Exception as e: - logger.error(f"Fehler beim Erstellen der Workflow-Nachricht: {str(e)}") + logger.error(f"Error creating workflow message: {str(e)}") # Return None instead of raising to avoid cascading failures return None - + def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]: """ - Aktualisiert eine bestehende Workflow-Nachricht in der Datenbank - with improved document handling. + Updates an existing workflow message in the database. Args: - message_id: ID der Nachricht - message_data: Zu aktualisierende Daten + message_id: ID of the message + message_data: Data to update Returns: - Das aktualisierte Nachrichtenobjekt oder None bei Fehler + The updated message object or None on error """ try: - # Print debug info - print(f"Updating message {message_id} in database") + # Debug info + logger.debug(f"Updating message {message_id} in database") # Ensure message_id is provided if not message_id: @@ -806,39 +833,19 @@ class LucyDOMInterface: logger.error(f"Workflow ID missing for new message {message_id}") return None - # Ensure documents array is handled properly - if "documents" in message_data: - logger.info(f"Message {message_id} has {len(message_data['documents'])} documents") - - # Make sure we're not storing huge content in the database - # For each document, ensure content size is reasonable - documents_to_store = [] - for doc in message_data["documents"]: - doc_copy = doc.copy() - - # Process contents array if it exists - if "contents" in doc_copy: - # Ensure contents is not too large - limit text size - for content in doc_copy["contents"]: - if content.get("type") == "text" and "text" in content: - text = content["text"] - if len(text) > 1000: # Limit text preview to 1000 chars - content["text"] = text[:1000] + "... [truncated]" - - documents_to_store.append(doc_copy) - - # Replace with the processed documents - message_data["documents"] = documents_to_store + # Update existing message + existing_message = messages[0] - # Log the update data size for debugging - update_data_size = len(str(message_data)) - logger.debug(f"Update data size: {update_data_size} bytes") + # Ensure required fields present + for key in ["role", "agent_name"]: + if key not in message_data and key not in existing_message: + message_data[key] = "assistant" if key == "role" else "" # Ensure ID is in the dataset if 'id' not in message_data: message_data['id'] = message_id - # Konvertiere created_at zu started_at falls nötig + # Convert created_at to started_at if needed if "created_at" in message_data and "started_at" not in message_data: message_data["started_at"] = message_data["created_at"] del message_data["created_at"] @@ -1005,23 +1012,24 @@ class LucyDOMInterface: def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool: """ - Speichert den kompletten Zustand eines Workflows in der Datenbank. - Dies umfasst den Workflow selbst, Nachrichten und Logs. + Saves the state of a workflow to the database. + Workflow data is updated, but messages are stored separately. Args: - workflow: Das vollständige Workflow-Objekt - save_messages: Flag, ob Nachrichten gespeichert werden sollen - save_logs: Flag, ob Logs gespeichert werden sollen + workflow: The workflow object + save_messages: Flag to determine if messages should be saved + save_logs: Flag to determine if logs should be saved Returns: - True bei Erfolg, False bei Fehler + True on success, False on failure """ try: workflow_id = workflow.get("id") if not workflow_id: return False - # Extrahiere nur die für die Datenbank relevanten Workflow-Felder + # Extract only the database-relevant workflow fields + # IMPORTANT: Don't store messages in the workflow table! workflow_db_data = { "id": workflow_id, "mandate_id": workflow.get("mandate_id", self.mandate_id), @@ -1030,58 +1038,56 @@ class LucyDOMInterface: "status": workflow.get("status", "unknown"), "started_at": workflow.get("started_at", self._get_current_timestamp()), "last_activity": workflow.get("last_activity", self._get_current_timestamp()), - "last_message_id": workflow.get("last_message_id", ""), "data_stats": workflow.get("data_stats", {}) } - # Prüfen, ob der Workflow bereits existiert + # Check if workflow already exists existing_workflow = self.get_workflow(workflow_id) if existing_workflow: self.update_workflow(workflow_id, workflow_db_data) else: self.create_workflow(workflow_db_data) - - # Nachrichten speichern + # Save messages if save_messages and "messages" in workflow: - # Bestehende Nachrichten abrufen - existing_messages = {msg["id"]: msg for msg in self.get_workflow_messages(workflow_id)} - for message in workflow["messages"]: message_id = message.get("id") if not message_id: continue - # Nur relevante Daten für die Datenbank extrahieren - message_data = { - "id": message_id, - "workflow_id": workflow_id, - "sequence_no": message.get("sequence_no", 0), - "role": message.get("role", "unknown"), - "content": message.get("content"), - "agent_name": message.get("agent_name"), - "status": message.get("status", "completed"), - "started_at": message.get("started_at", self._get_current_timestamp()), - "finished_at": message.get("finished_at"), - "parent_message_id": message.get("parent_message_id"), - # IMPORTANT: Include documents field to persist file attachments - "documents": message.get("documents", []) - } + # Since each message is already saved with create_workflow_message, + # we only need to check if updates are necessary + # First, get existing message from database + existing_messages = self.get_workflow_messages(workflow_id) + existing_message = next((m for m in existing_messages if m.get("id") == message_id), None) - # Debug logging for documents - doc_count = len(message.get("documents", [])) - if doc_count > 0: - logger.info(f"Message {message_id} has {doc_count} documents to save") - - # Nachricht erstellen oder aktualisieren - if message_id in existing_messages: - self.db.record_modify("workflow_messages", message_id, message_data) + if existing_message: + # Check if updates are needed + has_changes = False + for key in ["role", "agent_name", "content", "status", "documents"]: + if key in message and message.get(key) != existing_message.get(key): + has_changes = True + break + + if has_changes: + # Extract only relevant data for the database + message_data = { + "role": message.get("role", existing_message.get("role", "unknown")), + "content": message.get("content", existing_message.get("content", "")), + "agent_name": message.get("agent_name", existing_message.get("agent_name", "")), + "status": message.get("status", existing_message.get("status", "completed")), + "documents": message.get("documents", existing_message.get("documents", [])) + } + self.update_workflow_message(message_id, message_data) else: - self.db.record_create("workflow_messages", message_data) + # Message doesn't exist in database yet + # It should have been saved via create_workflow_message + # If not, log a warning + logger.warning(f"Message {message_id} in workflow {workflow_id} not found in database") - # Logs speichern + # Save logs if save_logs and "logs" in workflow: - # Bestehende Logs abrufen + # Get existing logs existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)} for log in workflow["logs"]: @@ -1089,7 +1095,7 @@ class LucyDOMInterface: if not log_id: continue - # Nur relevante Daten für die Datenbank extrahieren + # Extract only relevant data for the database log_data = { "id": log_id, "workflow_id": workflow_id, @@ -1100,7 +1106,7 @@ class LucyDOMInterface: "agent_name": log.get("agent_name") } - # Log erstellen oder aktualisieren + # Create or update log if log_id in existing_logs: self.db.record_modify("workflow_logs", log_id, log_data) else: @@ -1108,22 +1114,22 @@ class LucyDOMInterface: return True except Exception as e: - logger.error(f"Fehler beim Speichern des Workflow-Zustands: {str(e)}") + logger.error(f"Error saving workflow state: {str(e)}") return False def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]: """ - Lädt den kompletten Zustand eines Workflows aus der Datenbank. - Dies umfasst den Workflow selbst, Nachrichten und Logs. + Loads the complete state of a workflow from the database. + This includes the workflow itself, messages, and logs. Args: - workflow_id: ID des zu ladenden Workflows + workflow_id: ID of the workflow to load Returns: - Das vollständige Workflow-Objekt oder None bei Fehler + The complete workflow object or None on error """ try: - # Basis-Workflow laden + # Load base workflow workflow = self.get_workflow(workflow_id) if not workflow: return None @@ -1131,41 +1137,46 @@ class LucyDOMInterface: # Log the workflow base retrieval logger.debug(f"Loaded base workflow {workflow_id} from database") - # Nachrichten laden + # Load messages messages = self.get_workflow_messages(workflow_id) - # Nach Sequenznummer sortieren + # Sort by sequence number messages.sort(key=lambda x: x.get("sequence_no", 0)) # Debug log for messages and document counts message_count = len(messages) logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}") + # Check if message_ids exists and is valid + message_ids = workflow.get("message_ids", []) + if not message_ids or len(message_ids) != len(messages): + # Rebuild message_ids from messages + message_ids = [msg.get("id") for msg in messages] + # Update in database + self.update_workflow(workflow_id, {"message_ids": message_ids}) + logger.info(f"Rebuilt message_ids for workflow {workflow_id}") + # Log document counts for each message for msg in messages: doc_count = len(msg.get("documents", [])) if doc_count > 0: logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database") - # Log document details for debugging - for i, doc in enumerate(msg.get("documents", [])): - file_id = doc.get("file_id", "unknown") - logger.debug(f"Document {i+1}: file_id={file_id}") - # Logs laden + # Load logs logs = self.get_workflow_logs(workflow_id) - # Nach Zeitstempel sortieren + # Sort by timestamp logs.sort(key=lambda x: x.get("timestamp", "")) - # Vollständiges Workflow-Objekt zusammenbauen + # Assemble complete workflow object complete_workflow = workflow.copy() complete_workflow["messages"] = messages + complete_workflow["message_ids"] = message_ids # Ensure message_ids is included complete_workflow["logs"] = logs return complete_workflow except Exception as e: - logger.error(f"Fehler beim Laden des Workflow-Zustands: {str(e)}") + logger.error(f"Error loading workflow state: {str(e)}") return None - - + # Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext _lucydom_interfaces = {} diff --git a/modules/lucydom_model.py b/modules/lucydom_model.py index 9c6a7e13..0df066a8 100644 --- a/modules/lucydom_model.py +++ b/modules/lucydom_model.py @@ -3,143 +3,137 @@ from typing import List, Dict, Any, Optional class Label(BaseModel): - """Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen""" + """Label for an attribute or a class with support for multiple languages""" default: str translations: Dict[str, str] = {} def get_label(self, language: str = None): - """Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar""" + """Returns the label in the specified language, or the default value if not available""" if language and language in self.translations: return self.translations[language] return self.default class Prompt(BaseModel): - """Datenmodell für einen Prompt""" - id: int = Field(description="Eindeutige ID des Prompts") - mandate_id: int = Field(description="ID des zugehörigen Mandanten") - user_id: int = Field(description="ID des Erstellers") - content: str = Field(description="Inhalt des Prompts") - name: str = Field(description="Anzeigename des Prompts") + """Data model for a prompt""" + id: int = Field(description="Unique ID of the prompt") + mandate_id: int = Field(description="ID of the associated mandate") + user_id: int = Field(description="ID of the creator") + content: str = Field(description="Content of the prompt") + name: str = Field(description="Display name of the prompt") label: Label = Field( default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}), - description="Label für die Klasse" + description="Label for the class" ) - # Labels für Attribute + # Labels for attributes field_labels: Dict[str, Label] = { "id": Label(default="ID", translations={}), - "mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), - "user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), - "content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}), + "mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), + "user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), + "content": Label(default="Content", translations={"en": "Content", "fr": "Contenu"}), "name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}), } class FileItem(BaseModel): - """Datenmodell für ein File""" - id: int = Field(description="Eindeutige ID des Datenobjekts") - mandate_id: int = Field(description="ID des zugehörigen Mandanten") - user_id: int = Field(description="ID des Erstellers") - name: str = Field(description="Name des Datenobjekts") - mime_type: str = Field(description="Typ des Datenobjekts MIME-Typ") - size: Optional[int] = Field(None, description="Größe des Datenobjekts in Bytes") - file_hash: str = Field(description="Hash code für Deduplizierung") - creation_date: Optional[str] = Field(None, description="Datum des Hochladens") - workflow_id: Optional[str] = Field(None, description="ID des zugehörigen Workflows, falls vorhanden") + """Data model for a file""" + id: int = Field(description="Unique ID of the data object") + mandate_id: int = Field(description="ID of the associated mandate") + user_id: int = Field(description="ID of the creator") + name: str = Field(description="Name of the data object") + mime_type: str = Field(description="Type of the data object MIME type") + size: Optional[int] = Field(None, description="Size of the data object in bytes") + file_hash: str = Field(description="Hash code for deduplication") + creation_date: Optional[str] = Field(None, description="Upload date") + workflow_id: Optional[str] = Field(None, description="ID of the associated workflow, if any") label: Label = Field( - default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}), - description="Label für die Klasse" + default=Label(default="Data Object", translations={"en": "Data Object", "fr": "Objet de données"}), + description="Label for the class" ) - # Labels für Attribute + # Labels for attributes field_labels: Dict[str, Label] = { "id": Label(default="ID", translations={}), - "mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), - "user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), + "mandate_id": Label(default="Mandate ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), + "user_id": Label(default="User ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), "name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}), - "mime_type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}), - "size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}), - "file_hash": Label(default="File-Hash", translations={"en": "Hash", "fr": "Hash"}), - "creation_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}), - "workflow_id": Label(default="Workflow-ID", translations={"en": "Workflow ID", "fr": "ID du workflow"}) + "mime_type": Label(default="Type", translations={"en": "Type", "fr": "Type"}), + "size": Label(default="Size", translations={"en": "Size", "fr": "Taille"}), + "file_hash": Label(default="File Hash", translations={"en": "Hash", "fr": "Hash"}), + "creation_date": Label(default="Upload date", translations={"en": "Upload date", "fr": "Date de téléchargement"}), + "workflow_id": Label(default="Workflow ID", translations={"en": "Workflow ID", "fr": "ID du workflow"}) } class FileData(BaseModel): - """Datenmodell für den File-Inhalt""" - id: int = Field(description="Eindeutige ID des Datenobjekts") - data: str = Field(description="Binärer Inhalt der Datei als Base64-String") + """Data model for file content""" + id: int = Field(description="Unique ID of the data object") + data: str = Field(description="Binary content of the file as base64 string") -# Workflow-Modellklassen +# Workflow model classes class DocumentContent(BaseModel): - """Inhalt eines Dokuments im Workflow""" - sequence_nr: int = Field(1, description="Sequenz-Nummer des Inhaltes im Quelldokument") - name: str = Field(description="Bezeichnung") + """Content of a document in the workflow""" + sequence_nr: int = Field(1, description="Sequence number of the content in the source document") + name: str = Field(description="Designation") ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png") - content_type: str = Field(description="MIME-Typ") - data: str = Field(description="Binärer Inhalt der Daten als Base64-String") - summary: str = Field(description="Zusammenfassung des Datei-Inhaltes") - metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadaten zum Inhalt, wie z.B. is_text Flag, Format-Informationen, Encoding usw.") + content_type: str = Field(description="MIME type") + summary: str = Field(description="Summary of the file content") + metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadata about the content, such as is_text flag, format information, encoding, etc.") class Document(BaseModel): - """Dokument im Workflow - Referenziert direkt eine Datei in der Datenbank""" - id: str = Field(description="Eindeutige ID des Dokuments") - name: str = Field(description="Name des Datenobjekts") - ext: str = Field(description="Extension des Datenobjekts") - file_id: int = Field(description="ID der referenzierten Datei in der Datenbank") - contents: List[DocumentContent] = Field(description="Dokumentinhalte") + """Document in the workflow - References a file directly in the database""" + id: str = Field(description="Unique ID of the document") + name: str = Field(description="Name of the data object") + ext: str = Field(description="Extension of the data object") + file_id: int = Field(description="ID of the referenced file in the database") + data: str = Field(description="Content of the data as base64 string") + contents: List[DocumentContent] = Field(description="Document contents") class DataStats(BaseModel): - """Statistiken für Performance und Datennutzung""" - processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden") - token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)") - bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes") - bytes_received: Optional[int] = Field(None, description="Empfangene Bytes") + """Statistics for performance and data usage""" + processing_time: Optional[float] = Field(None, description="Processing time in seconds") + token_count: Optional[int] = Field(None, description="Token count (for AI models)") + bytes_sent: Optional[int] = Field(None, description="Bytes sent") + bytes_received: Optional[int] = Field(None, description="Bytes received") class Message(BaseModel): - """Nachrichtenobjekt im Workflow""" - id: str = Field(description="Eindeutige ID der Nachricht") - workflow_id: str = Field(description="Referenz zum übergeordneten Workflow") - parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht") - started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung") - finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss") - sequence_no: int = Field(description="Sequenznummer für Sortierung") + """Message object in the workflow""" + id: str = Field(description="Unique ID of the message") + workflow_id: str = Field(description="Reference to the parent workflow") + parent_message_id: Optional[str] = Field(None, description="Reference to the replied message") + started_at: str = Field(description="Timestamp for message creation") + finished_at: Optional[str] = Field(None, description="Timestamp for message completion") + sequence_no: int = Field(description="Sequence number for sorting") - status: str = Field(description="Status der Nachricht ('processing', 'completed')") - role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')") + status: str = Field(description="Status of the message ('processing', 'completed')") + role: str = Field(description="Role of the sender ('system', 'user', 'assistant')") - data_stats: Optional[DataStats] = Field(None, description="Statistiken") - documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht (Referenzen zu Dateien in der Datenbank)") - content: Optional[str] = Field(None, description="Textinhalt der Nachricht") - agent_name: Optional[str] = Field(None, description="Name des verwendeten Agenten") + data_stats: Optional[DataStats] = Field(None, description="Statistics") + documents: Optional[List[Document]] = Field(None, description="Documents in this message (references to files in the database)") + content: Optional[str] = Field(None, description="Text content of the message") + agent_name: Optional[str] = Field(None, description="Name of the agent used") class Workflow(BaseModel): - """Workflow-Objekt für Multi-Agent-System""" - id: str = Field(description="Eindeutige ID des Workflows") - name: Optional[str] = Field(None, description="Name des Workflows") - mandate_id: int = Field(description="ID des Mandanten") - user_id: int = Field(description="ID des Benutzers") - status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')") - started_at: str = Field(description="Startzeitpunkt") - last_activity: str = Field(description="Zeitpunkt der letzten Aktivität") - last_message_id: str = Field(description="The last registered message") + """Workflow object for multi-agent system""" + id: str = Field(description="Unique ID of the workflow") + name: Optional[str] = Field(None, description="Name of the workflow") + mandate_id: int = Field(description="ID of the mandate") + user_id: int = Field(description="ID of the user") + status: str = Field(description="Status of the workflow ('running', 'failed', 'stopped')") + started_at: str = Field(description="Start timestamp") + last_activity: str = Field(description="Timestamp of the last activity") + message_ids: List[str] = Field(default=[], description="List of message IDs in this workflow") - data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken") - messages: List[Message] = Field(default=[], description="Nachrichtenverlauf") - logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge") + data_stats: Optional[Dict[str, Any]] = Field(None, description="Total statistics") + messages: List[Message] = Field(default=[], description="Message history") + logs: List[Dict[str, Any]] = Field(default=[], description="Log entries") -# Anfragemodelle für die API - -class WorkflowCreateRequest(BaseModel): - """Anfrage zur Erstellung eines neuen Workflows""" - name: Optional[str] = Field(None, description="Name des Workflows") - prompt: str = Field(description="Zu verwendender Prompt") - files: List[int] = Field(default=[], description="Liste von FileItem ID") +# Request models for the API class UserInputRequest(BaseModel): - """Anfrage für Benutzereingabe an einen laufenden Workflow""" - prompt: str = Field(description="Nachricht des Benutzers") - listFileId: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID") \ No newline at end of file + """Request for user input to a running workflow""" + prompt: str = Field(description="Message from the user") + list_file_id: List[int] = Field(default=[], description="List of FileItem IDs") \ No newline at end of file diff --git a/notes/changelog.txt b/notes/changelog.txt index ae878ef6..3caac4e9 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -1,19 +1,32 @@ ....................... TASKS -please revise all chat_agents* modules: -- all comments, logs and outputs in english language -- all ai answers in the language of the user -- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically. -- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section. -documentation agent: -- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified +can you do following adaptions + +everywhere: +- to remove base64 checks ot tests. only to use base64_encoded attribute +- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted") + +please tell me, where to adapt what in the code. I do not neew fully new code. + + + + + + + + +german comments in logs and prompts to translate to english. where to adapt what? + +can you enhance all ai prompts to include, that the output is delivered in the language of the user? +An option to have a global variable for this, which is also trasferred with the task to the agents? + +streamline self.log_add --> to use in a standardized format and to reduce messages to relevant steps + +add connector to myoutlook + -webcrawler_agent: -- there is a try - except mapping problem in the code. please also fix this -- -also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters. ----------------------- OPEN @@ -41,6 +54,47 @@ frontend: no labels definition ----------------------- DONE +can you do following adaptions + +for document class: +- class Document to have a "data" attribute, where the file-data is stored in base64 format + +based on this: +- task object for agents to enhance with this attribute + +for content in contents in documents, when adding a file to a document object: +- to set "base64_encoded" if encoded. this should already be, to check + +when building task for the agents: +- ensure attribute "data" is integrated, containing filedata base64 encoded +- in each content to deliver "data" as it is, optional "base64_encoded" attribute depending on data format, to add attribute "data_extracted" and to store here the extracted data from ai call + +everywhere: +- to remove base64 checks ot tests. only to use base64_encoded attribute +- to use the enhanced attributes for document ("data" containing filedata in base64 format) and content ("data", "base64_encoded", "data_extracted") + +please tell me, where to adapt what in the code. I do not neew fully new code. + + + + +please revise all chat_agents* modules: +- all comments, logs and outputs in english language +- all ai answers in the language of the user +- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically. +- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section. + +documentation agent: +- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified + +webcrawler_agent: +- there is a try - except mapping problem in the code. please also fix this +- + +also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters. + + + alle expliziten prompt ersetzen. diff --git a/result.txt b/result.txt new file mode 100644 index 00000000..4a03458a --- /dev/null +++ b/result.txt @@ -0,0 +1 @@ +{'total_pixels': None, 'total_characters': None} \ No newline at end of file diff --git a/routes/workflows.py b/routes/workflows.py index 8d4ef5a2..e4ea42d1 100644 --- a/routes/workflows.py +++ b/routes/workflows.py @@ -70,7 +70,7 @@ async def list_workflows(current_user: Dict[str, Any] = Depends(get_current_acti @router.post("/{workflow_id}/user-input", response_model=Dict[str, Any]) async def submit_user_input( workflow_id: Optional[str] = Path(None, description="ID des Workflows (optional)"), - user_input: Dict[str, Any] = Body(...), + user_input: lucydom_model.UserInputRequest = Body(...), current_user: Dict[str, Any] = Depends(get_current_active_user) ): """ @@ -84,7 +84,11 @@ async def submit_user_input( try: # Workflow mit dem Chat-Manager fortsetzen oder neu starten - workflow = await context.interface_chat.chat_run(user_input, workflow_id) + user_input_dict = { + "prompt": user_input.prompt, + "list_file_id": user_input.list_file_id + } + workflow = await context.interface_chat.chat_run(user_input_dict, workflow_id) if not workflow: raise HTTPException( diff --git a/static/1_test_document.txt b/static/1_test_document.txt new file mode 100644 index 00000000..8ddf7560 --- /dev/null +++ b/static/1_test_document.txt @@ -0,0 +1,10 @@ + + This is a test text file for the ChatManager workflow. + It contains some information for testing document processing. + + The ChatManager should be able to process this file + and extract relevant information from it. + + This file serves as an example for text-based documents that can be + used in a chat workflow. + \ No newline at end of file diff --git a/static/2_test_image.png b/static/2_test_image.png new file mode 100644 index 00000000..7296313b Binary files /dev/null and b/static/2_test_image.png differ diff --git a/static/3_generated_code.py b/static/3_generated_code.py new file mode 100644 index 00000000..640274da --- /dev/null +++ b/static/3_generated_code.py @@ -0,0 +1,52 @@ +# REQUIREMENTS: Pillow + +from PIL import Image + +def calculate_image_pixels(image_path): + try: + with Image.open(image_path) as img: + width, height = img.size + total_pixels = width * height + return total_pixels + except Exception as e: + print(f"Error calculating image pixels: {e}") + return None + +def calculate_text_characters(text_path): + try: + with open(text_path, 'r', encoding='utf-8') as file: + text = file.read() + total_characters = len(text) + return total_characters + except Exception as e: + print(f"Error calculating text characters: {e}") + return None + +def main(): + image_path = 'test_image' + text_path = 'test_document' + + # Calculate total pixels in the image + total_pixels = calculate_image_pixels(image_path) + + # Calculate total characters in the text document + total_characters = calculate_text_characters(text_path) + + # Prepare the result dictionary + result = { + 'total_pixels': total_pixels, + 'total_characters': total_characters + } + + # Write the result to a text file + try: + with open('result.txt', 'w') as result_file: + result_file.write(str(result)) + except Exception as e: + print(f"Error writing result to file: {e}") + + # Output the result + print(result) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/static/4_execution_error.txt b/static/4_execution_error.txt new file mode 100644 index 00000000..ac8ab586 --- /dev/null +++ b/static/4_execution_error.txt @@ -0,0 +1,6 @@ +Execution error: + +Traceback (most recent call last): + File "C:\Users\pmots\AppData\Local\Temp\code_exec_itmq0xhw\code_9cc3911d.py", line 3, in + from PIL import Image +ModuleNotFoundError: No module named 'PIL' diff --git a/test2.py b/test2.py new file mode 100644 index 00000000..c25c0e6a --- /dev/null +++ b/test2.py @@ -0,0 +1,50 @@ +from PIL import Image + +def calculate_image_pixels(image_path): + try: + with Image.open(image_path) as img: + width, height = img.size + total_pixels = width * height + return total_pixels + except Exception as e: + print(f"Error calculating image pixels: {e}") + return None + +def calculate_text_characters(text_path): + try: + with open(text_path, 'r', encoding='utf-8') as file: + text = file.read() + total_characters = len(text) + return total_characters + except Exception as e: + print(f"Error calculating text characters: {e}") + return None + +def main(): + image_path = 'test_image' + text_path = 'test_document' + + # Calculate total pixels in the image + total_pixels = calculate_image_pixels(image_path) + + # Calculate total characters in the text document + total_characters = calculate_text_characters(text_path) + + # Prepare the result dictionary + result = { + 'total_pixels': total_pixels, + 'total_characters': total_characters + } + + # Write the result to a text file + try: + with open('result.txt', 'w') as result_file: + result_file.write(str(result)) + except Exception as e: + print(f"Error writing result to file: {e}") + + # Output the result + print(result) + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test_workflow1.py b/test_workflow1.py index 7a43948a..67dc3eb3 100644 --- a/test_workflow1.py +++ b/test_workflow1.py @@ -1,6 +1,6 @@ """ -Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads. -Demonstriert den vollständigen Workflow von Datei-Upload bis Chat-Ausführung. +Test script for ChatManager workflow with simulated file uploads. +Demonstrates the complete workflow from file upload to chat execution. """ import asyncio @@ -11,7 +11,7 @@ import sys from typing import Dict, Any, List, Tuple from datetime import datetime -# Logging konfigurieren +# Configure logging logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', @@ -19,43 +19,43 @@ logging.basicConfig( ) logger = logging.getLogger("test_workflow") -# Pfad zum Projektverzeichnis hinzufügen +# Add project directory to path sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -# Module importieren +# Import modules from modules.lucydom_interface import get_lucydom_interface from modules.chat import get_chat_manager async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]: """ - Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch. + Creates a text file and an image for testing and uploads them to the database. Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers + mandate_id: ID of the mandate + user_id: ID of the user Returns: - Tuple mit (text_file_id, image_file_id) + Tuple with (text_file_id, image_file_id) """ - logger.info("Erstelle Test-Dateien...") + logger.info("Creating test files...") lucy_interface = get_lucydom_interface(mandate_id, user_id) - # Textdatei erstellen + # Create text file text_content = """ - Dies ist eine Test-Textdatei für den ChatManager-Workflow. - Sie enthält einige Informationen zum Testen der Dokumentverarbeitung. + This is a test text file for the ChatManager workflow. + It contains some information for testing document processing. - Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten - und daraus relevante Informationen zu extrahieren. + The ChatManager should be able to process this file + and extract relevant information from it. - Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem - Chat-Workflow verwendet werden können. + This file serves as an example for text-based documents that can be + used in a chat workflow. """ text_file_bytes = text_content.encode('utf-8') text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt") text_file_id = text_file["id"] - logger.info(f"Textdatei erstellt mit ID: {text_file_id}") + logger.info(f"Text file created with ID: {text_file_id}") # Create a simple test image using PIL try: @@ -73,7 +73,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]: # Upload to database image_file = lucy_interface.save_uploaded_file(img_bytes, "test_image.png") image_file_id = image_file["id"] - logger.info(f"Bilddatei erstellt mit ID: {image_file_id}") + logger.info(f"Image file created with ID: {image_file_id}") except ImportError: # Fallback to the original method if PIL is not available @@ -87,7 +87,7 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]: image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png") image_file_id = image_file["id"] - logger.info(f"Bilddatei erstellt mit ID: {image_file_id}") + logger.info(f"Image file created with ID: {image_file_id}") return text_file_id, image_file_id @@ -95,72 +95,73 @@ async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]: async def run_chat_workflow(mandate_id: int, user_id: int, file_ids: List[int]) -> Dict[str, Any]: """ - Führt einen Chat-Workflow mit gegebenen Datei-IDs aus. + Executes a chat workflow with given file IDs. Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers - file_ids: Liste der Datei-IDs + mandate_id: ID of the mandate + user_id: ID of the user + file_ids: List of file IDs Returns: - Das Workflow-Ergebnis + The workflow result """ - logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}") + logger.info(f"Starting chat workflow with files: {file_ids}") - # ChatManager initialisieren + # Initialize ChatManager chat_manager = get_chat_manager(mandate_id, user_id) - # Benutzeranfrage erstellen + # Create user request user_input = { - "message": "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt.", - "additional_fileids": file_ids + "prompt": "Bitte zähle mir zusammen wieviele Pixel das Bild hat und wieviele Zeichen der Text der Dokumente hat", + "list_file_id": file_ids } - # Chat-Workflow ausführen + # Execute chat workflow workflow_result = await chat_manager.chat_run(user_input) - logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}") + logger.info(f"Workflow completed with ID: {workflow_result['id']}") return workflow_result def analyze_workflow_result(workflow: Dict[str, Any]) -> None: """ - Analysiert und gibt Informationen über das Workflow-Ergebnis aus. + Analyzes and outputs information about the workflow result. Args: - workflow: Das Workflow-Ergebnis + workflow: The workflow result """ - logger.info("Analysiere Workflow-Ergebnis:") - logger.info(f"Workflow-ID: {workflow['id']}") + logger.info("Analyzing workflow result:") + logger.info(f"Workflow ID: {workflow['id']}") logger.info(f"Status: {workflow['status']}") - logger.info(f"Anzahl Nachrichten: {len(workflow.get('messages', []))}") + logger.info(f"Number of messages: {len(workflow.get('messages', []))}") for i, message in enumerate(workflow.get('messages', [])): - logger.info(f"Nachricht {i+1}:") - logger.info(f" Rolle: {message.get('role', 'unbekannt')}") + logger.info(f"Message {i+1}:") + logger.info(f" Role: {message.get('role', 'unknown')}") - # Nur die ersten 100 Zeichen des Inhalts anzeigen + # Show only the first 100 characters of content content = message.get('content', '') content_preview = content[:100] + '...' if len(content) > 100 else content - logger.info(f" Inhalt: {content_preview}") + logger.info(f" Content: {content_preview}") - # Dokumente in der Nachricht anzeigen + # Show documents in the message documents = message.get('documents', []) - logger.info(f" Dokumente: {len(documents)}") + logger.info(f" Documents: {len(documents)}") for j, doc in enumerate(documents): - doc_id = doc.get('id', 'keine ID') - file_id = doc.get('file_id', 'keine file_id') - logger.info(f" Dokument {j+1}: ID={doc_id}, File-ID={file_id}") + doc_id = doc.get('id', 'no ID') + file_id = doc.get('file_id', 'no file_id') + logger.info(f" Document {j+1}: ID={doc_id}, File-ID={file_id}") - # Informationen über Inhalte + # Information about contents contents = doc.get('contents', []) for k, content in enumerate(contents): - content_name = content.get('name', 'kein Name') - content_type = content.get('content_type', 'unbekannt') - logger.info(f" Inhalt {k+1}: {content_name} ({content_type})") + content_name = content.get('name', 'no name') + content_type = content.get('content_type', 'unknown') + logger.info(f" Content {k+1}: {content_name} ({content_type})") - # Log-Einträge anzeigen - logger.info(f"Logs: {len(workflow.get('logs', []))}") - for i, log in enumerate(workflow.get('logs', []))[:10]: # Begrenzung auf 10 Logs + logs = workflow.get('logs', []) + logger.info(f"Logs: {len(logs)}") + # Get only the first 10 logs + for i, log in enumerate(logs[:10]): # Apply the slice to logs, not enumerate log_type = log.get('type', 'info') log_message = log.get('message', '') log_message_preview = log_message[:100] + '...' if len(log_message) > 100 else log_message @@ -168,14 +169,14 @@ def analyze_workflow_result(workflow: Dict[str, Any]) -> None: async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) -> None: """ - Bereinigt die erstellten Testdateien. + Cleans up the created test files. Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers - file_ids: Liste der zu löschenden Datei-IDs + mandate_id: ID of the mandate + user_id: ID of the user + file_ids: List of file IDs to delete """ - logger.info("Beginne Bereinigung der Testdateien...") + logger.info("Starting cleanup of test files...") lucy_interface = get_lucydom_interface(mandate_id, user_id) @@ -183,47 +184,47 @@ async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) try: success = lucy_interface.delete_file(file_id) if success: - logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht") + logger.info(f"File with ID {file_id} successfully deleted") else: - logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}") + logger.warning(f"Error deleting file with ID {file_id}") except Exception as e: - logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}") + logger.error(f"Error deleting file with ID {file_id}: {str(e)}") - logger.info("Bereinigung abgeschlossen") + logger.info("Cleanup completed") async def main(): """ - Hauptfunktion, die den gesamten Testprozess steuert. + Main function that controls the entire test process. """ - # Testparameter - MANDATE_ID = 1 # Test-Mandanten-ID - USER_ID = 1 # Test-Benutzer-ID - CLEANUP = True # Bereinigung nach dem Test + # Test parameters + MANDATE_ID = 1 # Test mandate ID + USER_ID = 1 # Test user ID + CLEANUP = True # Cleanup after test try: - logger.info("=== Test-Workflow für ChatManager gestartet ===") + logger.info("=== ChatManager test workflow started ===") - # Schritt 1: Testdateien erstellen + # Step 1: Create test files text_file_id, image_file_id = await create_test_files(MANDATE_ID, USER_ID) file_ids = [text_file_id, image_file_id] - # Schritt 2: Chat-Workflow ausführen + # Step 2: Execute chat workflow workflow_result = await run_chat_workflow(MANDATE_ID, USER_ID, file_ids) - # Schritt 3: Ergebnis analysieren + # Step 3: Analyze result analyze_workflow_result(workflow_result) - # Schritt 4: Optional bereinigen + # Step 4: Optional cleanup if CLEANUP: await cleanup_test_files(MANDATE_ID, USER_ID, file_ids) - logger.info("=== Test-Workflow erfolgreich abgeschlossen ===") + logger.info("=== Test workflow successfully completed ===") except Exception as e: - logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True) - logger.info("=== Test-Workflow mit Fehler beendet ===") + logger.error(f"Error in test workflow: {str(e)}", exc_info=True) + logger.info("=== Test workflow ended with error ===") if __name__ == "__main__": - # Event-Loop für asyncio erstellen und Hauptfunktion ausführen + # Create event loop for asyncio and execute main function loop = asyncio.get_event_loop() loop.run_until_complete(main()) \ No newline at end of file diff --git a/test_workflow2.py b/test_workflow2.py deleted file mode 100644 index 1a328404..00000000 --- a/test_workflow2.py +++ /dev/null @@ -1,373 +0,0 @@ -""" -Erweitertes Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads. -Bietet zusätzliche Konfigurationsmöglichkeiten und detailliertere Tests. -""" - -import asyncio -import logging -import os -import sys -import argparse -import json -from typing import Dict, Any, List, Tuple, Optional -from datetime import datetime - -# Logging konfigurieren -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', - handlers=[logging.StreamHandler()] -) -logger = logging.getLogger("test_workflow") - -# Pfad zum Projektverzeichnis hinzufügen -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) - -# Module importieren -from modules.lucydom_interface import get_lucydom_interface -from modules.chat import get_chat_manager - -class TestConfig: - """Konfigurationsklasse für Testparameter""" - def __init__(self): - self.mandate_id = 1 - self.user_id = 1 - self.cleanup = True - self.save_results = True - self.results_dir = "test_results" - self.test_message = "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt." - self.text_file_content = """ - Dies ist eine Test-Textdatei für den ChatManager-Workflow. - Sie enthält einige Informationen zum Testen der Dokumentverarbeitung. - - Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten - und daraus relevante Informationen zu extrahieren. - - Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem - Chat-Workflow verwendet werden können. - """ - -def parse_args() -> TestConfig: - """Parst Kommandozeilenargumente""" - parser = argparse.ArgumentParser(description="Test für ChatManager-Workflow") - parser.add_argument("--mandate-id", type=int, default=1, help="ID des Mandanten") - parser.add_argument("--user-id", type=int, default=1, help="ID des Benutzers") - parser.add_argument("--no-cleanup", action="store_true", help="Testdateien nicht löschen") - parser.add_argument("--no-save", action="store_true", help="Ergebnisse nicht speichern") - parser.add_argument("--results-dir", type=str, default="test_results", help="Verzeichnis für Ergebnisse") - parser.add_argument("--message", type=str, help="Benutzernachricht für den Test") - - args = parser.parse_args() - - config = TestConfig() - config.mandate_id = args.mandate_id - config.user_id = args.user_id - config.cleanup = not args.no_cleanup - config.save_results = not args.no_save - config.results_dir = args.results_dir - if args.message: - config.test_message = args.message - - return config - -async def create_test_files(config: TestConfig) -> Tuple[int, int]: - """ - Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch. - - Args: - config: Testkonfiguration - - Returns: - Tuple mit (text_file_id, image_file_id) - """ - logger.info("Erstelle Test-Dateien...") - - lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id) - - # Textdatei erstellen - text_content = config.text_file_content - text_file_bytes = text_content.encode('utf-8') - text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt") - text_file_id = text_file["id"] - logger.info(f"Textdatei erstellt mit ID: {text_file_id}") - - # Bilddatei erstellen (einfaches 1x1 PNG) - # Base64-kodiertes 1x1 PNG - png_data = bytes.fromhex( - "89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4" - "89000000017352474200aece1ce90000000467414d410000b18f0bfc61050000" - "000970485973000016250000162501495224f00000001974455874536f667477" - "617265007777772e696e6b73636170652e6f72679bee3c1a0000000c49444154" - "08d763f8ffff3f0005fe02fec1cd59830000000049454e44ae426082" - ) - image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png") - image_file_id = image_file["id"] - logger.info(f"Bilddatei erstellt mit ID: {image_file_id}") - - return text_file_id, image_file_id - -async def verify_uploaded_files(mandate_id: int, user_id: int, file_ids: List[int]) -> bool: - """ - Überprüft, ob die hochgeladenen Dateien korrekt in der Datenbank gespeichert wurden - - Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers - file_ids: Liste der Datei-IDs - - Returns: - True, wenn alle Dateien verfügbar sind - """ - logger.info("Überprüfe hochgeladene Dateien...") - - lucy_interface = get_lucydom_interface(mandate_id, user_id) - all_files_available = True - - for file_id in file_ids: - file = lucy_interface.get_file(file_id) - if file: - file_data = lucy_interface.get_file_data(file_id) - if file_data: - logger.info(f"Datei {file_id} ({file.get('name', 'Unbekannt')}, {file.get('mime_type', 'Unbekannt')}) ist verfügbar") - logger.info(f" Größe: {len(file_data)} Bytes") - else: - logger.error(f"Datei {file_id} hat keine Binärdaten") - all_files_available = False - else: - logger.error(f"Datei mit ID {file_id} nicht in der Datenbank gefunden") - all_files_available = False - - return all_files_available - -async def run_chat_workflow(config: TestConfig, file_ids: List[int]) -> Dict[str, Any]: - """ - Führt einen Chat-Workflow mit gegebenen Datei-IDs aus. - - Args: - config: Testkonfiguration - file_ids: Liste der Datei-IDs - - Returns: - Das Workflow-Ergebnis - """ - logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}") - - # ChatManager initialisieren - chat_manager = get_chat_manager(config.mandate_id, config.user_id) - - # Benutzeranfrage erstellen - user_input = { - "message": config.test_message, - "additional_fileids": file_ids - } - - # Start-Zeit erfassen - start_time = datetime.now() - - # Chat-Workflow ausführen - workflow_result = await chat_manager.chat_run(user_input) - - # Ende-Zeit und Dauer berechnen - end_time = datetime.now() - duration = (end_time - start_time).total_seconds() - - logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}") - logger.info(f"Dauer: {duration:.2f} Sekunden") - - return workflow_result - -def analyze_workflow_result(workflow: Dict[str, Any]) -> Dict[str, Any]: - """ - Analysiert das Workflow-Ergebnis und gibt Statistiken zurück. - - Args: - workflow: Das Workflow-Ergebnis - - Returns: - Dictionary mit Analyseergebnissen - """ - logger.info("Analysiere Workflow-Ergebnis:") - - # Basis-Informationen - analysis = { - "workflow_id": workflow.get("id"), - "status": workflow.get("status"), - "message_count": len(workflow.get("messages", [])), - "log_count": len(workflow.get("logs", [])), - "document_count": 0, - "roles": {}, - "document_types": {}, - "response_sizes": [] - } - - # Nachrichten analysieren - for message in workflow.get("messages", []): - # Rollen zählen - role = message.get("role", "unknown") - if role not in analysis["roles"]: - analysis["roles"][role] = 0 - analysis["roles"][role] += 1 - - # Content-Größe bei Antworten - if role == "assistant": - content = message.get("content", "") - analysis["response_sizes"].append(len(content)) - - # Dokumente zählen und analysieren - documents = message.get("documents", []) - analysis["document_count"] += len(documents) - - for doc in documents: - contents = doc.get("contents", []) - for content in contents: - content_type = content.get("content_type", "unknown") - if content_type not in analysis["document_types"]: - analysis["document_types"][content_type] = 0 - analysis["document_types"][content_type] += 1 - - # Ausgabe für Log - logger.info(f"Workflow-ID: {analysis['workflow_id']}") - logger.info(f"Status: {analysis['status']}") - logger.info(f"Anzahl Nachrichten: {analysis['message_count']}") - logger.info(f"Anzahl Dokumente: {analysis['document_count']}") - logger.info(f"Rollen-Verteilung: {analysis['roles']}") - logger.info(f"Dokumenttypen: {analysis['document_types']}") - - if analysis["response_sizes"]: - avg_size = sum(analysis["response_sizes"]) / len(analysis["response_sizes"]) - logger.info(f"Durchschnittliche Antwortgröße: {avg_size:.2f} Zeichen") - - # Detaillierte Nachrichteninformationen - for i, message in enumerate(workflow.get("messages", [])[:5]): # Begrenzung auf 5 Nachrichten - logger.info(f"Nachricht {i+1}:") - logger.info(f" Rolle: {message.get('role', 'unbekannt')}") - - # Nur die ersten 100 Zeichen des Inhalts anzeigen - content = message.get("content", "") - content_preview = content[:100] + "..." if len(content) > 100 else content - logger.info(f" Inhalt: {content_preview}") - - # Dokumente in der Nachricht anzeigen - documents = message.get("documents", []) - if documents: - logger.info(f" Dokumente: {len(documents)}") - for j, doc in enumerate(documents): - file_id = doc.get("file_id", "keine file_id") - logger.info(f" Dokument {j+1}: File-ID={file_id}") - - return analysis - -def save_test_results(config: TestConfig, workflow: Dict[str, Any], analysis: Dict[str, Any]) -> None: - """ - Speichert die Testergebnisse in einer Datei. - - Args: - config: Testkonfiguration - workflow: Das vollständige Workflow-Ergebnis - analysis: Die Analyseergebnisse - """ - if not config.save_results: - return - - # Ergebnisverzeichnis erstellen, falls es nicht existiert - os.makedirs(config.results_dir, exist_ok=True) - - # Zeitstempel für eindeutige Dateinamen - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - - # Speichere die Analyse - analysis_file = os.path.join(config.results_dir, f"analysis_{timestamp}.json") - with open(analysis_file, "w", encoding="utf-8") as f: - json.dump(analysis, f, indent=2, ensure_ascii=False) - logger.info(f"Analyse gespeichert in: {analysis_file}") - - # Speichere den vollständigen Workflow (ohne große Binärdaten) - workflow_copy = workflow.copy() - - # Entferne Binärdaten aus dem Export, um die Dateigröße zu reduzieren - for message in workflow_copy.get("messages", []): - if "documents" in message: - for doc in message.get("documents", []): - if "contents" in doc: - for content in doc.get("contents", []): - if "data" in content and isinstance(content["data"], bytes) and len(content["data"]) > 1000: - content["data"] = f"[{len(content['data'])} Bytes]" - - workflow_file = os.path.join(config.results_dir, f"workflow_{timestamp}.json") - with open(workflow_file, "w", encoding="utf-8") as f: - # Konvertiere Bytes zu Strings für JSON-Serialisierung - json.dump(workflow_copy, f, indent=2, ensure_ascii=False, default=lambda o: - o.decode("utf-8") if isinstance(o, bytes) else str(o)) - logger.info(f"Workflow gespeichert in: {workflow_file}") - -async def cleanup_test_files(config: TestConfig, file_ids: List[int]) -> None: - """ - Bereinigt die erstellten Testdateien. - - Args: - config: Testkonfiguration - file_ids: Liste der zu löschenden Datei-IDs - """ - if not config.cleanup: - logger.info("Bereinigung übersprungen (--no-cleanup)") - return - - logger.info("Beginne Bereinigung der Testdateien...") - - lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id) - - for file_id in file_ids: - try: - success = lucy_interface.delete_file(file_id) - if success: - logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht") - else: - logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}") - except Exception as e: - logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}") - - logger.info("Bereinigung abgeschlossen") - -async def main(): - """ - Hauptfunktion, die den gesamten Testprozess steuert. - """ - # Konfiguration laden - config = parse_args() - - try: - logger.info("=== Test-Workflow für ChatManager gestartet ===") - logger.info(f"Mandate-ID: {config.mandate_id}, User-ID: {config.user_id}") - - # Schritt 1: Testdateien erstellen - text_file_id, image_file_id = await create_test_files(config) - file_ids = [text_file_id, image_file_id] - - # Schritt 2: Hochgeladene Dateien überprüfen - files_ok = await verify_uploaded_files(config.mandate_id, config.user_id, file_ids) - if not files_ok: - logger.error("Fehler bei den hochgeladenen Dateien, Test wird abgebrochen") - return - - # Schritt 3: Chat-Workflow ausführen - workflow_result = await run_chat_workflow(config, file_ids) - - # Schritt 4: Ergebnis analysieren - analysis = analyze_workflow_result(workflow_result) - - # Schritt 5: Ergebnisse speichern - save_test_results(config, workflow_result, analysis) - - # Schritt 6: Bereinigen - await cleanup_test_files(config, file_ids) - - logger.info("=== Test-Workflow erfolgreich abgeschlossen ===") - - except Exception as e: - logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True) - logger.info("=== Test-Workflow mit Fehler beendet ===") - -if __name__ == "__main__": - # Event-Loop für asyncio erstellen und Hauptfunktion ausführen - loop = asyncio.get_event_loop() - loop.run_until_complete(main()) \ No newline at end of file