refactored chat engine
This commit is contained in:
parent
9e2b6f1344
commit
9247de4346
42 changed files with 2271 additions and 14660 deletions
2
app.py
2
app.py
|
|
@ -14,7 +14,7 @@ from logging.handlers import RotatingFileHandler
|
|||
from datetime import timedelta
|
||||
import pathlib
|
||||
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
from modules.gateway_interface import get_gateway_interface
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import logging
|
|||
import httpx
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
# Logger konfigurieren
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -199,7 +199,7 @@ class ChatService:
|
|||
# Unterscheide zwischen Dateipfad und Binärdaten
|
||||
if isinstance(image_data, str):
|
||||
# Es ist ein Dateipfad - importiere filehandling nur bei Bedarf
|
||||
from gateway.gwserver.modules import agentservice_filemanager as file_handler
|
||||
from modules import agentservice_filemanager as file_handler
|
||||
base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
|
||||
mime_type = mime_type or auto_mime_type
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ import logging
|
|||
import httpx
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
from fastapi import HTTPException
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
# Logger konfigurieren
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -108,7 +108,7 @@ class ChatService:
|
|||
# Unterscheide zwischen Dateipfad und Binärdaten
|
||||
if isinstance(image_data, str):
|
||||
# Es ist ein Dateipfad - importiere filehandling nur bei Bedarf
|
||||
from gateway.gwserver.modules import agentservice_filemanager as file_handler
|
||||
from modules import agentservice_filemanager as file_handler
|
||||
base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
|
||||
mime_type = mime_type or auto_mime_type
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -375,7 +375,7 @@ class DatabaseConnector:
|
|||
# Wenn die Tabelle leer ist und eine System-ID registriert werden soll
|
||||
if not data:
|
||||
self.register_initial_id(table, record_data["id"])
|
||||
logger.info(f"Initiale ID {record_data['id']} für Tabelle {table} registriert")
|
||||
logger.info(f"Initiale ID {record_data['id']} für Tabelle {table} wurde registriert")
|
||||
|
||||
# Füge den neuen Datensatz hinzu
|
||||
data.append(record_data)
|
||||
|
|
@ -462,6 +462,7 @@ class DatabaseConnector:
|
|||
# Datensatz nicht gefunden
|
||||
raise ValueError(f"Datensatz mit ID {record_id} nicht gefunden in Tabelle {table}")
|
||||
|
||||
|
||||
# System-Tabellen-Funktionen
|
||||
|
||||
def register_initial_id(self, table: str, initial_id: int) -> bool:
|
||||
|
|
|
|||
|
|
@ -1,675 +0,0 @@
|
|||
import os
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from datetime import datetime
|
||||
import mysql.connector
|
||||
from mysql.connector import Error
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class DatabaseConnector:
|
||||
"""
|
||||
Ein Konnektor für MySQL-basierte Datenspeicherung.
|
||||
Stellt generische Datenbankoperationen bereit.
|
||||
"""
|
||||
|
||||
def __init__(self, db_host: str, db_database: str, db_user: str, db_password: str, mandate_id: int = None, user_id: int = None):
|
||||
"""
|
||||
Initialisiert den MySQL-Datenbankkonnektor.
|
||||
|
||||
Args:
|
||||
db_host: MySQL-Server Host
|
||||
db_database: Name der Datenbank
|
||||
db_user: Benutzername für die Authentifizierung
|
||||
db_password: Passwort für die Authentifizierung
|
||||
mandate_id: Kontext-Parameter für den Mandanten
|
||||
user_id: Kontext-Parameter für den Benutzer
|
||||
"""
|
||||
# Speichere die Eingabeparameter
|
||||
self.db_host = db_host
|
||||
self.db_database = db_database
|
||||
self.db_user = db_user
|
||||
self.db_password = db_password
|
||||
|
||||
# Prüfe, ob Kontext-Parameter gesetzt sind
|
||||
if mandate_id is None or user_id is None:
|
||||
raise ValueError("mandate_id und user_id müssen gesetzt sein")
|
||||
|
||||
# Stelle Verbindung zur Datenbank her
|
||||
self.connection = self._create_connection()
|
||||
|
||||
# System-Tabelle initialisieren
|
||||
self._system_table_name = "_system"
|
||||
self._initialize_system_table()
|
||||
|
||||
# Temporär mandate_id und user_id speichern
|
||||
self._mandate_id = mandate_id
|
||||
self._user_id = user_id
|
||||
|
||||
# Wenn mandate_id oder user_id 0 sind, versuche die initialen IDs zu verwenden
|
||||
if mandate_id == 0:
|
||||
initial_mandate_id = self.get_initial_id("mandates")
|
||||
if initial_mandate_id is not None:
|
||||
self._mandate_id = initial_mandate_id
|
||||
logger.info(f"Verwende initiale mandate_id: {initial_mandate_id} statt 0")
|
||||
|
||||
if user_id == 0:
|
||||
initial_user_id = self.get_initial_id("users")
|
||||
if initial_user_id is not None:
|
||||
self._user_id = initial_user_id
|
||||
logger.info(f"Verwende initiale user_id: {initial_user_id} statt 0")
|
||||
|
||||
# Setze die effektiven IDs als Eigenschaften
|
||||
self.mandate_id = self._mandate_id
|
||||
self.user_id = self._user_id
|
||||
|
||||
logger.info(f"DatabaseConnector initialisiert für Datenbank: {db_database}")
|
||||
logger.info(f"Kontext: mandate_id={self.mandate_id}, user_id={self.user_id}")
|
||||
|
||||
def _create_connection(self):
|
||||
"""Erstellt eine Verbindung zur MySQL-Datenbank"""
|
||||
try:
|
||||
connection = mysql.connector.connect(
|
||||
host=self.db_host,
|
||||
database=self.db_database,
|
||||
user=self.db_user,
|
||||
password=self.db_password
|
||||
)
|
||||
if connection.is_connected():
|
||||
logger.info(f"Verbunden mit MySQL-Server Version {connection.get_server_info()}")
|
||||
return connection
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Verbindung zu MySQL: {e}")
|
||||
raise
|
||||
|
||||
def _initialize_system_table(self):
|
||||
"""Initialisiert die System-Tabelle, falls sie noch nicht existiert."""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
# Prüfe, ob die System-Tabelle existiert
|
||||
cursor.execute(f"""
|
||||
SELECT COUNT(*)
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = '{self.db_database}'
|
||||
AND table_name = '{self._system_table_name}'
|
||||
""")
|
||||
|
||||
if cursor.fetchone()[0] == 0:
|
||||
# Erstelle die System-Tabelle
|
||||
cursor.execute(f"""
|
||||
CREATE TABLE {self._system_table_name} (
|
||||
table_name VARCHAR(255) PRIMARY KEY,
|
||||
initial_id INT NOT NULL
|
||||
)
|
||||
""")
|
||||
self.connection.commit()
|
||||
logger.info(f"System-Tabelle '{self._system_table_name}' erstellt")
|
||||
except Error as e:
|
||||
logger.error(f"Fehler beim Initialisieren der System-Tabelle: {e}")
|
||||
if self.connection.is_connected():
|
||||
self.connection.rollback()
|
||||
raise
|
||||
finally:
|
||||
if cursor and cursor.is_connected():
|
||||
cursor.close()
|
||||
|
||||
def _execute_query(self, query: str, params: tuple = None):
|
||||
"""Führt eine SQL-Abfrage aus"""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor(dictionary=True)
|
||||
cursor.execute(query, params)
|
||||
return cursor
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Ausführung der Abfrage: {e}")
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
def _execute_select(self, query: str, params: tuple = None) -> List[Dict[str, Any]]:
|
||||
"""Führt eine SELECT-Abfrage aus und gibt die Ergebnisse zurück"""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor(dictionary=True)
|
||||
cursor.execute(query, params)
|
||||
result = cursor.fetchall()
|
||||
return result
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Ausführung der SELECT-Abfrage: {e}")
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
def _execute_insert(self, query: str, params: tuple = None) -> int:
|
||||
"""Führt eine INSERT-Abfrage aus und gibt die ID des eingefügten Datensatzes zurück"""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(query, params)
|
||||
self.connection.commit()
|
||||
return cursor.lastrowid
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Ausführung der INSERT-Abfrage: {e}")
|
||||
self.connection.rollback()
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
def _execute_update(self, query: str, params: tuple = None) -> int:
|
||||
"""Führt eine UPDATE-Abfrage aus und gibt die Anzahl der betroffenen Zeilen zurück"""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(query, params)
|
||||
self.connection.commit()
|
||||
return cursor.rowcount
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Ausführung der UPDATE-Abfrage: {e}")
|
||||
self.connection.rollback()
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
def _execute_delete(self, query: str, params: tuple = None) -> int:
|
||||
"""Führt eine DELETE-Abfrage aus und gibt die Anzahl der gelöschten Zeilen zurück"""
|
||||
cursor = None
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(query, params)
|
||||
self.connection.commit()
|
||||
return cursor.rowcount
|
||||
except Error as e:
|
||||
logger.error(f"Fehler bei der Ausführung der DELETE-Abfrage: {e}")
|
||||
self.connection.rollback()
|
||||
raise
|
||||
finally:
|
||||
if cursor:
|
||||
cursor.close()
|
||||
|
||||
def _apply_record_filter(self, record_filter: Dict[str, Any] = None) -> str:
|
||||
"""Erstellt eine WHERE-Klausel basierend auf dem Datensatzfilter"""
|
||||
if not record_filter:
|
||||
return "WHERE 1=1"
|
||||
|
||||
conditions = []
|
||||
params = []
|
||||
|
||||
for field, value in record_filter.items():
|
||||
conditions.append(f"{field} = %s")
|
||||
params.append(value)
|
||||
|
||||
where_clause = "WHERE " + " AND ".join(conditions)
|
||||
|
||||
return where_clause, tuple(params)
|
||||
|
||||
def _get_context_filter(self) -> tuple:
|
||||
"""Erstellt eine WHERE-Klausel für den Mandanten- und Benutzerkontext"""
|
||||
return "WHERE mandate_id = %s", (self.mandate_id,)
|
||||
|
||||
# Public API
|
||||
|
||||
def get_tables(self, filter_criteria: Dict[str, Any] = None) -> List[str]:
|
||||
"""
|
||||
Gibt eine Liste aller verfügbaren Tabellen zurück.
|
||||
|
||||
Args:
|
||||
filter_criteria: Optionale Filterkriterien (nicht implementiert)
|
||||
|
||||
Returns:
|
||||
Liste der Tabellennamen
|
||||
"""
|
||||
query = """
|
||||
SELECT table_name
|
||||
FROM information_schema.tables
|
||||
WHERE table_schema = %s
|
||||
AND table_name NOT LIKE '\_%'
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self._execute_select(query, (self.db_database,))
|
||||
return [row["table_name"] for row in result]
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen der Tabellen: {e}")
|
||||
return []
|
||||
|
||||
def get_fields(self, table: str, filter_criteria: Dict[str, Any] = None) -> List[str]:
|
||||
"""
|
||||
Gibt eine Liste aller Felder einer Tabelle zurück.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
filter_criteria: Optionale Filterkriterien (nicht implementiert)
|
||||
|
||||
Returns:
|
||||
Liste der Feldnamen
|
||||
"""
|
||||
query = """
|
||||
SELECT column_name
|
||||
FROM information_schema.columns
|
||||
WHERE table_schema = %s AND table_name = %s
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self._execute_select(query, (self.db_database, table))
|
||||
return [row["column_name"] for row in result]
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen der Felder für Tabelle {table}: {e}")
|
||||
return []
|
||||
|
||||
def get_schema(self, table: str, language: str = None, filter_criteria: Dict[str, Any] = None) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Gibt ein Schema-Objekt für eine Tabelle zurück mit Datentypen und Labels.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
language: Sprache für die Labels (optional)
|
||||
filter_criteria: Optionale Filterkriterien (nicht implementiert)
|
||||
|
||||
Returns:
|
||||
Schema-Objekt mit Feldern, Datentypen und Labels
|
||||
"""
|
||||
query = """
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
column_comment
|
||||
FROM
|
||||
information_schema.columns
|
||||
WHERE
|
||||
table_schema = %s AND table_name = %s
|
||||
"""
|
||||
|
||||
schema = {}
|
||||
|
||||
try:
|
||||
result = self._execute_select(query, (self.db_database, table))
|
||||
|
||||
for row in result:
|
||||
field = row["column_name"]
|
||||
data_type = row["data_type"]
|
||||
comment = row["column_comment"]
|
||||
|
||||
# Label erstellen (Standardwert ist der Feldname)
|
||||
label = field
|
||||
|
||||
# Wenn ein Kommentar existiert, verwende diesen als Label
|
||||
if comment:
|
||||
label = comment
|
||||
|
||||
schema[field] = {
|
||||
"type": data_type,
|
||||
"label": label
|
||||
}
|
||||
|
||||
return schema
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen des Schemas für Tabelle {table}: {e}")
|
||||
return {}
|
||||
|
||||
def get_recordset(self, table: str, field_filter: List[str] = None, record_filter: Dict[str, Any] = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Gibt eine Liste von Datensätzen aus einer Tabelle zurück, gefiltert nach Kriterien.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
field_filter: Filter für Felder (welche Felder zurückgegeben werden sollen)
|
||||
record_filter: Filter für Datensätze (welche Datensätze zurückgegeben werden sollen)
|
||||
|
||||
Returns:
|
||||
Liste der gefilterten Datensätze
|
||||
"""
|
||||
# Bestimme die Felder für die Abfrage
|
||||
fields = "*"
|
||||
if field_filter and isinstance(field_filter, list):
|
||||
fields = ", ".join(field_filter)
|
||||
|
||||
# Basisbedingung ist der Mandantenkontext
|
||||
base_where, base_params = self._get_context_filter()
|
||||
|
||||
# Wende zusätzliche Filterbedingungen an, wenn vorhanden
|
||||
additional_where = ""
|
||||
additional_params = ()
|
||||
|
||||
if record_filter:
|
||||
additional_where, additional_params = self._apply_record_filter(record_filter)
|
||||
# Entferne das "WHERE" am Anfang und ersetze es durch "AND"
|
||||
additional_where = " AND " + additional_where[6:]
|
||||
|
||||
# Kombiniere die Bedingungen und Parameter
|
||||
where_clause = base_where + additional_where
|
||||
params = base_params + additional_params
|
||||
|
||||
# Erstelle die vollständige Abfrage
|
||||
query = f"""
|
||||
SELECT {fields} FROM {table} {where_clause}
|
||||
"""
|
||||
|
||||
try:
|
||||
return self._execute_select(query, params)
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen der Datensätze aus Tabelle {table}: {e}")
|
||||
return []
|
||||
|
||||
def record_create(self, table: str, record_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Erstellt einen neuen Datensatz in der Tabelle.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
record_data: Daten für den neuen Datensatz
|
||||
|
||||
Returns:
|
||||
Der erstellte Datensatz
|
||||
"""
|
||||
# Füge mandate_id und user_id hinzu, falls nicht vorhanden oder 0
|
||||
if "mandate_id" not in record_data or record_data["mandate_id"] == 0:
|
||||
record_data["mandate_id"] = self.mandate_id
|
||||
|
||||
if "user_id" not in record_data or record_data["user_id"] == 0:
|
||||
record_data["user_id"] = self.user_id
|
||||
|
||||
# Erstelle die Abfrage
|
||||
fields = ", ".join(record_data.keys())
|
||||
placeholders = ", ".join(["%s"] * len(record_data))
|
||||
values = tuple(record_data.values())
|
||||
|
||||
query = f"""
|
||||
INSERT INTO {table} ({fields})
|
||||
VALUES ({placeholders})
|
||||
"""
|
||||
|
||||
try:
|
||||
# Prüfe zuerst, ob die Tabelle leer ist
|
||||
check_query = f"""
|
||||
SELECT COUNT(*) as count FROM {table}
|
||||
"""
|
||||
count_result = self._execute_select(check_query)
|
||||
is_empty = count_result[0]["count"] == 0
|
||||
|
||||
# Führe die Abfrage aus und erhalte die ID des neuen Datensatzes
|
||||
new_id = self._execute_insert(query, values)
|
||||
|
||||
# Wenn die Tabelle vorher leer war, registriere die neue ID als initiale ID
|
||||
if is_empty and new_id:
|
||||
self.register_initial_id(table, new_id)
|
||||
logger.info(f"Initiale ID {new_id} für Tabelle {table} registriert")
|
||||
|
||||
# Füge die ID zum Datensatz hinzu, falls eine zurückgegeben wurde
|
||||
if new_id:
|
||||
record_data["id"] = new_id
|
||||
|
||||
return record_data
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Erstellen des Datensatzes in Tabelle {table}: {e}")
|
||||
raise ValueError(f"Fehler beim Erstellen des Datensatzes in Tabelle {table}")
|
||||
|
||||
def record_delete(self, table: str, record_id: Union[str, int]) -> bool:
|
||||
"""
|
||||
Löscht einen Datensatz aus der Tabelle.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
record_id: ID des zu löschenden Datensatzes
|
||||
|
||||
Returns:
|
||||
True bei Erfolg, False bei Fehler
|
||||
"""
|
||||
# Prüfe, ob es sich um die initiale ID handelt
|
||||
initial_id = self.get_initial_id(table)
|
||||
if initial_id is not None and initial_id == record_id:
|
||||
logger.warning(f"Versuch, den initialen Datensatz mit ID {record_id} aus Tabelle {table} zu löschen, wurde verhindert")
|
||||
return False
|
||||
|
||||
# Prüfe zuerst, ob der Datensatz zum aktuellen Mandanten gehört
|
||||
check_query = f"""
|
||||
SELECT mandate_id FROM {table} WHERE id = %s
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self._execute_select(check_query, (record_id,))
|
||||
|
||||
if not result:
|
||||
# Datensatz nicht gefunden
|
||||
return False
|
||||
|
||||
if result[0]["mandate_id"] != self.mandate_id:
|
||||
raise ValueError("Not your mandate")
|
||||
|
||||
# Lösche den Datensatz
|
||||
delete_query = f"""
|
||||
DELETE FROM {table} WHERE id = %s AND mandate_id = %s
|
||||
"""
|
||||
|
||||
rows_affected = self._execute_delete(delete_query, (record_id, self.mandate_id))
|
||||
|
||||
return rows_affected > 0
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Löschen des Datensatzes aus Tabelle {table}: {e}")
|
||||
return False
|
||||
|
||||
def record_modify(self, table: str, record_id: Union[str, int], record_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Ändert einen Datensatz in der Tabelle.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
record_id: ID des zu ändernden Datensatzes
|
||||
record_data: Neue Daten für den Datensatz
|
||||
|
||||
Returns:
|
||||
Der aktualisierte Datensatz
|
||||
"""
|
||||
# Prüfe, ob es sich um die initiale ID handelt und die ID geändert werden soll
|
||||
initial_id = self.get_initial_id(table)
|
||||
if initial_id is not None and initial_id == record_id and "id" in record_data and record_data["id"] != record_id:
|
||||
raise ValueError(f"Die ID des initialen Datensatzes in Tabelle {table} kann nicht geändert werden")
|
||||
|
||||
# Prüfe zuerst, ob der Datensatz zum aktuellen Mandanten gehört
|
||||
check_query = f"""
|
||||
SELECT mandate_id FROM {table} WHERE id = %s
|
||||
"""
|
||||
|
||||
try:
|
||||
result = self._execute_select(check_query, (record_id,))
|
||||
|
||||
if not result:
|
||||
# Datensatz nicht gefunden
|
||||
raise ValueError(f"Datensatz mit ID {record_id} nicht gefunden in Tabelle {table}")
|
||||
|
||||
if result[0]["mandate_id"] != self.mandate_id:
|
||||
raise ValueError("Not your mandate")
|
||||
|
||||
# Erstelle die SET-Klausel und Parameter für das Update
|
||||
set_clauses = []
|
||||
values = []
|
||||
|
||||
for key, value in record_data.items():
|
||||
set_clauses.append(f"{key} = %s")
|
||||
values.append(value)
|
||||
|
||||
set_clause = ", ".join(set_clauses)
|
||||
values.append(record_id) # Für die WHERE-Bedingung
|
||||
values.append(self.mandate_id) # Für die mandate_id-Bedingung
|
||||
|
||||
# Aktualisiere den Datensatz
|
||||
update_query = f"""
|
||||
UPDATE {table}
|
||||
SET {set_clause}
|
||||
WHERE id = %s AND mandate_id = %s
|
||||
"""
|
||||
|
||||
rows_affected = self._execute_update(update_query, tuple(values))
|
||||
|
||||
if rows_affected > 0:
|
||||
# Lade den aktualisierten Datensatz
|
||||
get_query = f"""
|
||||
SELECT * FROM {table} WHERE id = %s
|
||||
"""
|
||||
|
||||
updated_record = self._execute_select(get_query, (record_id,))
|
||||
|
||||
if updated_record:
|
||||
return updated_record[0]
|
||||
else:
|
||||
raise ValueError(f"Fehler beim Abrufen des aktualisierten Datensatzes aus Tabelle {table}")
|
||||
else:
|
||||
raise ValueError(f"Fehler beim Aktualisieren des Datensatzes in Tabelle {table}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Aktualisieren des Datensatzes in Tabelle {table}: {e}")
|
||||
raise
|
||||
|
||||
# System-Tabellen-Funktionen
|
||||
|
||||
def register_initial_id(self, table: str, initial_id: int) -> bool:
|
||||
"""
|
||||
Registriert die initiale ID für eine Tabelle.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
initial_id: Die initiale ID
|
||||
|
||||
Returns:
|
||||
True bei Erfolg, False bei Fehler
|
||||
"""
|
||||
try:
|
||||
# Prüfe zuerst, ob bereits eine initiale ID für diese Tabelle registriert ist
|
||||
check_query = f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {self._system_table_name}
|
||||
WHERE table_name = %s
|
||||
"""
|
||||
|
||||
result = self._execute_select(check_query, (table,))
|
||||
|
||||
if result and result[0]["count"] > 0:
|
||||
# Bereits registriert
|
||||
return True
|
||||
|
||||
# Registriere die initiale ID
|
||||
insert_query = f"""
|
||||
INSERT INTO {self._system_table_name} (table_name, initial_id)
|
||||
VALUES (%s, %s)
|
||||
"""
|
||||
|
||||
self._execute_insert(insert_query, (table, initial_id))
|
||||
logger.info(f"Initiale ID {initial_id} für Tabelle {table} registriert")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Registrieren der initialen ID für Tabelle {table}: {e}")
|
||||
return False
|
||||
|
||||
def get_initial_id(self, table: str) -> Optional[int]:
|
||||
"""
|
||||
Gibt die initiale ID für eine Tabelle zurück.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
|
||||
Returns:
|
||||
Die initiale ID oder None, wenn nicht vorhanden
|
||||
"""
|
||||
try:
|
||||
query = f"""
|
||||
SELECT initial_id
|
||||
FROM {self._system_table_name}
|
||||
WHERE table_name = %s
|
||||
"""
|
||||
|
||||
result = self._execute_select(query, (table,))
|
||||
|
||||
if result and len(result) > 0:
|
||||
logger.info(f"Gefundene initiale ID für Tabelle {table}: {result[0]['initial_id']}")
|
||||
return result[0]["initial_id"]
|
||||
|
||||
# Wenn keine initiale ID gefunden wurde, versuche den ersten Datensatz zu verwenden
|
||||
if table and not table.startswith("_"):
|
||||
try:
|
||||
query = f"""
|
||||
SELECT id
|
||||
FROM {table}
|
||||
ORDER BY id
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
first_record = self._execute_select(query)
|
||||
|
||||
if first_record and len(first_record) > 0 and "id" in first_record[0]:
|
||||
first_id = first_record[0]["id"]
|
||||
# Registriere diese ID als initiale ID
|
||||
self.register_initial_id(table, first_id)
|
||||
logger.info(f"Automatisch erkannte initiale ID {first_id} für Tabelle {table}")
|
||||
return first_id
|
||||
except Exception as inner_e:
|
||||
logger.warning(f"Konnte keinen ersten Datensatz in Tabelle {table} finden: {inner_e}")
|
||||
|
||||
logger.debug(f"Keine initiale ID für Tabelle {table} gefunden")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen der initialen ID für Tabelle {table}: {e}")
|
||||
return None
|
||||
|
||||
def has_initial_id(self, table: str) -> bool:
|
||||
"""
|
||||
Prüft, ob eine initiale ID für eine Tabelle registriert ist.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
|
||||
Returns:
|
||||
True, wenn eine initiale ID registriert ist, sonst False
|
||||
"""
|
||||
try:
|
||||
query = f"""
|
||||
SELECT COUNT(*) as count
|
||||
FROM {self._system_table_name}
|
||||
WHERE table_name = %s
|
||||
"""
|
||||
|
||||
result = self._execute_select(query, (table,))
|
||||
|
||||
if result and len(result) > 0:
|
||||
return result[0]["count"] > 0
|
||||
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Prüfen der initialen ID für Tabelle {table}: {e}")
|
||||
return False
|
||||
|
||||
def get_all_initial_ids(self) -> Dict[str, int]:
|
||||
"""
|
||||
Gibt alle registrierten initialen IDs zurück.
|
||||
|
||||
Returns:
|
||||
Dictionary mit Tabellennamen als Schlüssel und initialen IDs als Werte
|
||||
"""
|
||||
try:
|
||||
query = f"""
|
||||
SELECT table_name, initial_id
|
||||
FROM {self._system_table_name}
|
||||
"""
|
||||
|
||||
result = self._execute_select(query)
|
||||
|
||||
initial_ids = {}
|
||||
for row in result:
|
||||
initial_ids[row["table_name"]] = row["initial_id"]
|
||||
|
||||
return initial_ids
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Abrufen aller initialen IDs: {e}")
|
||||
return {}
|
||||
|
||||
def close(self):
|
||||
"""Schließt die Datenbankverbindung"""
|
||||
if hasattr(self, 'connection') and self.connection.is_connected():
|
||||
self.connection.close()
|
||||
logger.info("Datenbankverbindung geschlossen")
|
||||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,399 +0,0 @@
|
|||
"""
|
||||
Creative Agent for knowledge-based answers and creative content generation.
|
||||
Handles open questions, documentation tasks, and special 'poweron' requests.
|
||||
Based on the refactored Core-Module.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
import json
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from modules.agentservice_utils import MessageUtils, LoggingUtils
|
||||
from modules.agentservice_protocol import AgentCommunicationProtocol
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CreativeAgent(BaseAgent):
|
||||
"""Agent for knowledge-based answers and creative content generation"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Creative Agent"""
|
||||
super().__init__()
|
||||
self.id = "creative"
|
||||
self.name = "Creative Knowledge Assistant"
|
||||
self.type = "knowledge"
|
||||
self.description = "Provides knowledge-based answers, creates content, handles document processing, and responds to PowerOn requests"
|
||||
|
||||
# Extended capabilities to explicitly cover document processing
|
||||
self.capabilities = ("knowledge_sharing,content_creation,document_generation,"
|
||||
"creative_writing,poweron,document_processing,"
|
||||
"information_extraction,data_transformation,"
|
||||
"document_analysis,text_processing,table_creation,"
|
||||
"visual_information_processing,content_structuring")
|
||||
|
||||
# Update result format to include tables
|
||||
self.result_format = "Text,Document,Table"
|
||||
|
||||
# Add enhanced document capabilities
|
||||
self.supports_documents = True
|
||||
self.document_capabilities = ["read", "create", "analyze", "extract", "transform"]
|
||||
self.required_context = ["workflow_id"]
|
||||
self.document_handler = None
|
||||
|
||||
# Initialize AI service
|
||||
self.ai_service = None
|
||||
|
||||
# Initialize protocol
|
||||
self.protocol = AgentCommunicationProtocol()
|
||||
|
||||
# Initialize utilities
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
info = super().get_agent_info()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"specialties": [
|
||||
"creative_writing",
|
||||
"documentation",
|
||||
"knowledge",
|
||||
"poweron",
|
||||
"document_processing",
|
||||
"information_extraction",
|
||||
"content_transformation",
|
||||
"table_generation",
|
||||
"document_analysis"
|
||||
]
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
def set_document_handler(self, document_handler):
|
||||
"""Set the document handler for file operations"""
|
||||
self.document_handler = document_handler
|
||||
|
||||
|
||||
async def old_process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a message and generate a creative or knowledge-based response.
|
||||
Enhanced with improved document handling.
|
||||
|
||||
Args:
|
||||
message: The message to process
|
||||
context: Additional context
|
||||
|
||||
Returns:
|
||||
The generated response
|
||||
"""
|
||||
# Extract workflow_id from context or message
|
||||
workflow_id = context.get("workflow_id") if context and isinstance(context, dict) else None
|
||||
if not workflow_id and isinstance(message, dict):
|
||||
workflow_id = message.get("workflow_id", "unknown")
|
||||
|
||||
# Create response structure early for fallback
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id
|
||||
}
|
||||
|
||||
# Safely create logging utils
|
||||
log_func = None
|
||||
logging_utils = None
|
||||
try:
|
||||
from modules.agentservice_utils import LoggingUtils
|
||||
log_func = context.get("log_func") if context and isinstance(context, dict) else None
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
except Exception as e:
|
||||
# If we can't even create logging utils, use basic logging
|
||||
logger.error(f"Error creating logging utils: {str(e)}")
|
||||
|
||||
# Log function that works with or without logging_utils
|
||||
def safe_log(message, level="info"):
|
||||
try:
|
||||
if logging_utils:
|
||||
if level == "info":
|
||||
logging_utils.info(message, "agents")
|
||||
elif level == "warning":
|
||||
logging_utils.warning(message, "agents")
|
||||
elif level == "error":
|
||||
logging_utils.error(message, "agents")
|
||||
else:
|
||||
if level == "info":
|
||||
logger.info(message)
|
||||
elif level == "warning":
|
||||
logger.warning(message)
|
||||
elif level == "error":
|
||||
logger.error(message)
|
||||
except Exception as log_err:
|
||||
logger.error(f"Error in logging: {str(log_err)}")
|
||||
|
||||
try:
|
||||
safe_log("Starting to process request", "info")
|
||||
|
||||
# Get the prompt from the message with safety check
|
||||
prompt = ""
|
||||
if isinstance(message, dict):
|
||||
prompt = message.get("content", "")
|
||||
|
||||
safe_log(f"Processing request: {prompt[:50]}...", "info")
|
||||
|
||||
# Power-On handling with safety check
|
||||
if prompt and "poweron" in prompt.lower():
|
||||
safe_log("Detected PowerOn keyword, generating specialized response", "info")
|
||||
|
||||
poweron_prompt = f"""
|
||||
Tell to the user in the language of their prompt a big big thank you, that they think for you being PowerOn. Tell them, how pleased you are, to be part of the PowerOn family, working to support humans for a better life.
|
||||
|
||||
Then generate a short answer (1-2 sentences) to this question: {prompt}
|
||||
"""
|
||||
|
||||
try:
|
||||
poweron_response = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You are a helpful assistant that is part of the PowerOn family."},
|
||||
{"role": "user", "content": poweron_prompt}
|
||||
])
|
||||
|
||||
response["content"] = poweron_response
|
||||
safe_log("PowerOn response generated", "info")
|
||||
return response
|
||||
except Exception as api_err:
|
||||
safe_log(f"Error calling API for PowerOn: {str(api_err)}", "error")
|
||||
response["content"] = "I encountered an error while generating a PowerOn response. Please try again."
|
||||
return response
|
||||
|
||||
# Create system prompt
|
||||
system_prompt = "You are a helpful, creative assistant specializing in knowledge sharing, content creation, and document processing."
|
||||
|
||||
# Add conversation summarization capabilities
|
||||
system_prompt += """
|
||||
When asked to summarize information, always consider:
|
||||
1. All provided document content
|
||||
2. The entire conversation history in the current workflow
|
||||
3. Any structured data that has been shared
|
||||
|
||||
For summarization tasks specifically, make sure to analyze the complete context including previous messages in the conversation, not just the files or the current request.
|
||||
"""
|
||||
|
||||
if workflow_id and workflow_id != "unknown":
|
||||
system_prompt += """
|
||||
You are currently operating within a workflow where multiple messages may have been exchanged.
|
||||
When generating summaries or overviews, you must incorporate the content from previous messages
|
||||
in this workflow as they contain valuable context and information.
|
||||
"""
|
||||
|
||||
# Safely check for documents
|
||||
has_documents = False
|
||||
document_count = 0
|
||||
|
||||
try:
|
||||
if isinstance(message, dict) and "documents" in message:
|
||||
documents = message.get("documents")
|
||||
if documents is not None:
|
||||
document_count = len(documents)
|
||||
has_documents = document_count > 0
|
||||
safe_log(f"Message contains {document_count} documents", "info")
|
||||
except Exception as doc_err:
|
||||
safe_log(f"Error checking documents: {str(doc_err)}", "warning")
|
||||
|
||||
# Initialize document variables
|
||||
document_content = ""
|
||||
document_texts = []
|
||||
document_names = []
|
||||
|
||||
# Process documents with extreme caution
|
||||
if has_documents:
|
||||
safe_log("Processing attached documents", "info")
|
||||
|
||||
# Try document handler first
|
||||
try:
|
||||
if self.document_handler:
|
||||
try:
|
||||
document_content = self.document_handler.merge_document_contents(message)
|
||||
if document_content:
|
||||
safe_log("Successfully extracted document content with handler", "info")
|
||||
else:
|
||||
safe_log("Document handler returned empty content", "warning")
|
||||
except Exception as handler_err:
|
||||
safe_log(f"Error using document handler: {str(handler_err)}", "warning")
|
||||
except Exception as err:
|
||||
safe_log(f"General error with document handler: {str(err)}", "warning")
|
||||
|
||||
# Fallback: manual extraction (very cautious)
|
||||
try:
|
||||
documents = message.get("documents", []) or []
|
||||
|
||||
for i, doc in enumerate(documents):
|
||||
if doc is None:
|
||||
safe_log(f"Document at index {i} is None", "warning")
|
||||
continue
|
||||
|
||||
try:
|
||||
# Process source
|
||||
source = None
|
||||
if isinstance(doc, dict):
|
||||
source = doc.get("source")
|
||||
|
||||
# Get name
|
||||
doc_name = "Document"
|
||||
if isinstance(source, dict):
|
||||
doc_name = source.get("name", f"Document {i+1}")
|
||||
|
||||
document_names.append(doc_name)
|
||||
safe_log(f"Processing document: {doc_name}", "info")
|
||||
|
||||
# Get contents
|
||||
contents = []
|
||||
if isinstance(doc, dict):
|
||||
contents = doc.get("contents", []) or []
|
||||
|
||||
doc_text = ""
|
||||
for content_item in contents:
|
||||
if content_item is None:
|
||||
continue
|
||||
|
||||
if isinstance(content_item, dict) and content_item.get("type") == "text":
|
||||
text = content_item.get("text", "")
|
||||
if text:
|
||||
doc_text = text
|
||||
document_texts.append(doc_text)
|
||||
safe_log(f"Found text content in {doc_name}", "info")
|
||||
break
|
||||
|
||||
# Handle empty content
|
||||
if not doc_text:
|
||||
safe_log(f"No text content found in {doc_name}", "warning")
|
||||
placeholder = f"[This appears to be a document named '{doc_name}', but I couldn't extract its content]"
|
||||
document_texts.append(placeholder)
|
||||
|
||||
except Exception as doc_err:
|
||||
safe_log(f"Error processing individual document: {str(doc_err)}", "warning")
|
||||
except Exception as docs_err:
|
||||
safe_log(f"Error in document processing loop: {str(docs_err)}", "warning")
|
||||
|
||||
# Combine prompt with documents safely
|
||||
full_prompt = prompt
|
||||
|
||||
try:
|
||||
if document_content:
|
||||
full_prompt = f"{prompt}\n\n### Reference Documents:\n{document_content}"
|
||||
safe_log("Using document handler content", "info")
|
||||
elif document_texts and document_names:
|
||||
# Use only corresponding pairs of names and texts
|
||||
docs_content = ""
|
||||
min_length = min(len(document_names), len(document_texts))
|
||||
|
||||
for i in range(min_length):
|
||||
name = document_names[i]
|
||||
text = document_texts[i]
|
||||
docs_content += f"\n\n### Document: {name}\n{text}"
|
||||
|
||||
if docs_content:
|
||||
full_prompt = f"{prompt}\n\n{docs_content}"
|
||||
safe_log("Using manually extracted content", "info")
|
||||
else:
|
||||
safe_log("No document content could be added", "warning")
|
||||
else:
|
||||
safe_log("No document content available to add to prompt", "info")
|
||||
except Exception as combine_err:
|
||||
safe_log(f"Error combining prompt with documents: {str(combine_err)}", "warning")
|
||||
|
||||
# Call AI API
|
||||
try:
|
||||
safe_log("Calling AI service", "info")
|
||||
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": full_prompt}
|
||||
])
|
||||
|
||||
response["content"] = content
|
||||
safe_log("Response successfully generated", "info")
|
||||
|
||||
except Exception as api_err:
|
||||
safe_log(f"Error calling AI API: {str(api_err)}", "error")
|
||||
response["content"] = f"I encountered an error while processing your request. Please try again or rephrase your question."
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
# Ultra-safe error handling
|
||||
error_msg = f"Error generating response: {str(e)}"
|
||||
try:
|
||||
if logging_utils:
|
||||
logging_utils.error(error_msg, "error")
|
||||
else:
|
||||
logger.error(error_msg)
|
||||
except:
|
||||
logger.error(f"Critical error in error handling: {error_msg}")
|
||||
|
||||
response["content"] = f"I encountered an error while processing your request: {str(e)}"
|
||||
return response
|
||||
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Direct message processing function that focuses on properly handling the user's request.
|
||||
"""
|
||||
# Extract workflow_id and setup response
|
||||
workflow_id = "unknown"
|
||||
if context and isinstance(context, dict) and "workflow_id" in context:
|
||||
workflow_id = context["workflow_id"]
|
||||
elif message and isinstance(message, dict) and "workflow_id" in message:
|
||||
workflow_id = message["workflow_id"]
|
||||
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"agent_name": self.name,
|
||||
"result_format": "Text",
|
||||
"workflow_id": workflow_id
|
||||
}
|
||||
|
||||
try:
|
||||
# Extract the user's message directly
|
||||
user_message = ""
|
||||
if isinstance(message, dict) and "content" in message:
|
||||
user_message = message["content"]
|
||||
|
||||
# Ensure we have something to process
|
||||
if not user_message:
|
||||
response["content"] = "Please provide a message for me to respond to."
|
||||
return response
|
||||
|
||||
# Simple system prompt that focuses on direct response to the user's request
|
||||
system_prompt = """You are a helpful, creative assistant.
|
||||
Respond directly to the user's request without referencing any workflow or system context.
|
||||
Focus only on providing a direct, helpful response to the specific question or request."""
|
||||
|
||||
# Process with AI
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message}
|
||||
])
|
||||
|
||||
response["content"] = content
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in process_message: {str(e)}")
|
||||
response["content"] = f"I encountered an error while processing your request: {str(e)}"
|
||||
return response
|
||||
|
||||
# Singleton-Instanz
|
||||
_creative_agent = None
|
||||
|
||||
def get_creative_agent():
|
||||
"""Returns a singleton instance of the Creative Agent"""
|
||||
global _creative_agent
|
||||
if _creative_agent is None:
|
||||
_creative_agent = CreativeAgent()
|
||||
return _creative_agent
|
||||
|
|
@ -1,574 +0,0 @@
|
|||
"""
|
||||
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
|
||||
Verwendet einen adaptiven Prozess zur Erstellung hochwertiger Dokumentation basierend auf der Komplexität des Auftrags.
|
||||
Angepasst für das refaktorisierte Core-Modul und AgentCommunicationProtocol.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
|
||||
from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
|
||||
from modules.agentservice_filemanager import FileManager # Import the file manager
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentationAgent(BaseAgent):
|
||||
"""Agent for creating documentation and structured content"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the documentation agent"""
|
||||
super().__init__()
|
||||
self.id = "documentation_agent"
|
||||
self.name = "Documentation Specialist"
|
||||
self.type = "documentation"
|
||||
self.description = "Creates documentation and structured content"
|
||||
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
|
||||
self.result_format = "FormattedDocument"
|
||||
|
||||
# Initialize AI service
|
||||
self.ai_service = None
|
||||
|
||||
# Initialize document handler
|
||||
self.document_handler = None
|
||||
|
||||
# Document capabilities
|
||||
self.supports_documents = True
|
||||
self.document_capabilities = ["read", "reference", "create"]
|
||||
self.required_context = ["document_purpose", "target_audience"]
|
||||
|
||||
# Initialize protocol
|
||||
self.protocol = AgentCommunicationProtocol()
|
||||
|
||||
# Initialize utilities
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
# Track the latest generated document
|
||||
self.last_document = {}
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
info = super().get_agent_info()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"document_types": ["manual", "report", "process", "presentation", "document"],
|
||||
"formats": ["markdown", "text"]
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
def set_document_handler(self, document_handler):
|
||||
"""Set the document handler for file operations"""
|
||||
self.document_handler = document_handler
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a message and create documentation.
|
||||
|
||||
Args:
|
||||
message: Input message
|
||||
context: Optional context
|
||||
|
||||
Returns:
|
||||
Response with documentation
|
||||
"""
|
||||
# Extract workflow_id from context or message
|
||||
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
|
||||
|
||||
# Get or create logging_utils
|
||||
log_func = context.get("log_func") if context else None
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
|
||||
# Create response structure
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id,
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Initial status update
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Starting document creation",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.0,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
# Extract task from message
|
||||
task = message.get("content", "")
|
||||
|
||||
# Detect document type - 10% progress
|
||||
document_type = self._detect_document_type(task)
|
||||
logging_utils.info(f"Creating {document_type} documentation", "execution")
|
||||
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description=f"Identified document type: {document_type}",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.1,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
# Process any attached documents - 30% progress
|
||||
document_context = ""
|
||||
if message.get("documents"):
|
||||
logging_utils.info("Processing reference documents", "execution")
|
||||
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Processing reference documents",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.2,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
document_context = await self._process_documents(message)
|
||||
|
||||
# Update progress
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Reference documents processed",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.3,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
# Enhanced prompt with document context
|
||||
enhanced_prompt = f"{task}\n\n{document_context}"
|
||||
|
||||
# Assess complexity - 40% progress
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Assessing document complexity",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.4,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
is_complex = await self._assess_complexity(enhanced_prompt)
|
||||
complexity_type = "complex" if is_complex else "simple"
|
||||
logging_utils.info(f"Document complexity assessment: {complexity_type}", "execution")
|
||||
|
||||
# Generate title - 50% progress
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Generating document title",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.5,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
title = await self._generate_title(enhanced_prompt, document_type)
|
||||
logging_utils.info(f"Document title: {title}", "execution")
|
||||
|
||||
# Update progress
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description=f"Generating {document_type}: {title}",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.6,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
# Generate content based on complexity - 70% progress
|
||||
if is_complex:
|
||||
# For complex documents, use the AI service with enhanced prompt
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description=f"Creating complex {document_type} document: {title}",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.7,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
content = await self._generate_complex_document(enhanced_prompt, document_type, title)
|
||||
logging_utils.info("Complex document generated", "execution")
|
||||
else:
|
||||
# For simple documents, use direct generation
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description=f"Creating simple {document_type} document: {title}",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.7,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
content = await self._generate_simple_document(enhanced_prompt, document_type, title)
|
||||
logging_utils.info("Simple document generated", "execution")
|
||||
|
||||
# Finalize document - 90% progress
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Finalizing document",
|
||||
sender_id=self.id,
|
||||
status="in_progress",
|
||||
progress=0.9,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
# Create a document artifact if document handler is available
|
||||
if self.document_handler:
|
||||
doc_id = f"doc_{uuid.uuid4()}"
|
||||
document = {
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "generated",
|
||||
"id": doc_id,
|
||||
"name": title,
|
||||
"content_type": "text/markdown",
|
||||
"size": len(content)
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": True
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Add document to response
|
||||
response["documents"].append(document)
|
||||
|
||||
# Store the latest document
|
||||
self.last_document = document
|
||||
|
||||
# Update response content to reference the document
|
||||
response["content"] = f"I've created a document titled '{title}' that contains the requested information. The document is attached to this message."
|
||||
|
||||
# If protocol message is required, send it
|
||||
if context and context.get("require_protocol_message"):
|
||||
result_message = self.send_document_result(
|
||||
document_title=title,
|
||||
document_content=content,
|
||||
sender_id=self.id,
|
||||
receiver_id=context.get("receiver_id", "workflow"),
|
||||
context_id=workflow_id
|
||||
)
|
||||
# Just log the message creation
|
||||
logging_utils.info(f"Created protocol result message: {result_message.id}", "execution")
|
||||
else:
|
||||
# If no document handler, just put content in response
|
||||
response["content"] = content
|
||||
|
||||
# Final progress update
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description="Document creation completed",
|
||||
sender_id=self.id,
|
||||
status="completed",
|
||||
progress=1.0,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "info", self.id, self.name)
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error in documentation agent: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Create error response using protocol
|
||||
error_message = self.protocol.create_error_message(
|
||||
error_description=error_msg,
|
||||
sender_id=self.id,
|
||||
error_type="documentation",
|
||||
error_details={"traceback": traceback.format_exc()},
|
||||
context_id=workflow_id
|
||||
)
|
||||
|
||||
# Log error status
|
||||
if log_func:
|
||||
status_message = self.protocol.create_status_update_message(
|
||||
status_description=f"Error creating documentation: {str(e)}",
|
||||
sender_id=self.id,
|
||||
status="error",
|
||||
progress=1.0,
|
||||
context_id=workflow_id
|
||||
)
|
||||
log_func(workflow_id, status_message.content, "error", self.id, self.name)
|
||||
|
||||
# Set error in response
|
||||
response["content"] = f"## Error creating documentation\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
|
||||
response["status"] = "error"
|
||||
|
||||
return response
|
||||
|
||||
# Helper method to process document content with enhanced logging
|
||||
async def _process_documents(self, message: Dict[str, Any]) -> str:
|
||||
"""Process documents in the message with detailed logging"""
|
||||
if not message.get("documents"):
|
||||
return ""
|
||||
|
||||
document_context = ""
|
||||
|
||||
if self.document_handler:
|
||||
# Use document handler to merge contents
|
||||
document_context = self.document_handler.merge_document_contents(message)
|
||||
else:
|
||||
# Manual processing
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
doc_name = source.get("name", "unnamed")
|
||||
|
||||
document_context += f"\n\n--- {doc_name} ---\n"
|
||||
|
||||
for content in document.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
document_context += content.get("text", "")
|
||||
|
||||
# Log summary of processed documents
|
||||
doc_count = len(message.get("documents", []))
|
||||
context_size = len(document_context)
|
||||
|
||||
logger.info(f"Processed {doc_count} documents, extracted {context_size} characters of context")
|
||||
|
||||
return document_context
|
||||
|
||||
|
||||
|
||||
async def _assess_complexity(self, task: str) -> bool:
|
||||
"""
|
||||
Assess task complexity to determine document structure.
|
||||
|
||||
Args:
|
||||
task: The task description
|
||||
|
||||
Returns:
|
||||
True if complex document needed, False otherwise
|
||||
"""
|
||||
if not self.ai_service:
|
||||
# Default to complex if no AI service
|
||||
return True
|
||||
|
||||
prompt = f"""
|
||||
Analyze this task and determine if it requires a complex or simple document structure:
|
||||
|
||||
{task}
|
||||
|
||||
Respond with only "COMPLEX" or "SIMPLE".
|
||||
"""
|
||||
|
||||
try:
|
||||
response = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You determine document complexity requirements."},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
return "COMPLEX" in response.upper()
|
||||
except Exception:
|
||||
# Default to complex on error
|
||||
return True
|
||||
|
||||
async def _generate_title(self, task: str, document_type: str) -> str:
|
||||
"""
|
||||
Generate a title for the document.
|
||||
|
||||
Args:
|
||||
task: The task description
|
||||
document_type: Type of document
|
||||
|
||||
Returns:
|
||||
Generated title
|
||||
"""
|
||||
if not self.ai_service:
|
||||
# Default title if no AI service
|
||||
return f"{document_type.capitalize()} Document"
|
||||
|
||||
prompt = f"""
|
||||
Create a concise, professional title for this {document_type}:
|
||||
|
||||
{task}
|
||||
|
||||
Respond with ONLY the title, nothing else.
|
||||
"""
|
||||
|
||||
try:
|
||||
title = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You create document titles."},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Clean up the title
|
||||
return title.strip('"\'#*- \n\t')
|
||||
except Exception:
|
||||
# Default title on error
|
||||
return f"{document_type.capitalize()} Document"
|
||||
|
||||
async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str:
|
||||
"""
|
||||
Generate a complex document with structure.
|
||||
|
||||
Args:
|
||||
task: The task description
|
||||
document_type: Type of document
|
||||
title: Document title
|
||||
|
||||
Returns:
|
||||
Generated document content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nUnable to generate complex document: AI service not available."
|
||||
|
||||
prompt = f"""
|
||||
Create a comprehensive, well-structured {document_type} titled "{title}" based on:
|
||||
|
||||
{task}
|
||||
|
||||
The document should include:
|
||||
1. A clear introduction with purpose and scope
|
||||
2. Logically organized sections with headings
|
||||
3. Detailed content with examples and evidence
|
||||
4. A conclusion with key takeaways
|
||||
5. Appropriate formatting using Markdown
|
||||
|
||||
Format the document in Markdown with proper headings, lists, and emphasis.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You create comprehensive, well-structured documentation."},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Ensure title is at the top
|
||||
if not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"# {title}\n\nError generating document: {str(e)}"
|
||||
|
||||
async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str:
|
||||
"""
|
||||
Generate a simple document without complex structure.
|
||||
|
||||
Args:
|
||||
task: The task description
|
||||
document_type: Type of document
|
||||
title: Document title
|
||||
|
||||
Returns:
|
||||
Generated document content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nUnable to generate document: AI service not available."
|
||||
|
||||
prompt = f"""
|
||||
Create a concise, focused {document_type} titled "{title}" based on:
|
||||
|
||||
{task}
|
||||
|
||||
The document should be clear, precise, and to the point without complex chapter structure.
|
||||
Format using Markdown with appropriate headings and formatting.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You create concise, focused documentation."},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Ensure title is at the top
|
||||
if not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"# {title}\n\nError generating document: {str(e)}"
|
||||
|
||||
def _detect_document_type(self, message: str) -> str:
|
||||
"""
|
||||
Detect document type from the message.
|
||||
|
||||
Args:
|
||||
message: User message
|
||||
|
||||
Returns:
|
||||
Detected document type
|
||||
"""
|
||||
message = message.lower()
|
||||
|
||||
if any(term in message for term in ["manual", "guide", "instruction", "tutorial"]):
|
||||
return "manual"
|
||||
elif any(term in message for term in ["report", "analysis", "assessment", "review"]):
|
||||
return "report"
|
||||
elif any(term in message for term in ["process", "workflow", "procedure", "steps"]):
|
||||
return "process"
|
||||
elif any(term in message for term in ["presentation", "slides", "deck"]):
|
||||
return "presentation"
|
||||
else:
|
||||
return "document"
|
||||
|
||||
def send_document_result(self, document_title: str, document_content: str,
|
||||
sender_id: str, receiver_id: str, context_id: str = None) -> AgentMessage:
|
||||
"""Send a document result using the protocol"""
|
||||
metadata = {
|
||||
"document_type": self._detect_document_type(document_content),
|
||||
"title": document_title,
|
||||
"created_at": datetime.now().isoformat()
|
||||
}
|
||||
|
||||
return self.protocol.create_result_message(
|
||||
result_content=document_content,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
task_id=f"doc_{uuid.uuid4()}",
|
||||
output_data=metadata,
|
||||
result_format=self.result_format,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None,
|
||||
context_id: str = None) -> AgentMessage:
|
||||
"""Send an error message using the protocol"""
|
||||
return self.protocol.create_error_message(
|
||||
error_description=error_description,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
error_type="documentation_error",
|
||||
error_details={"timestamp": datetime.now().isoformat()},
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
# Singleton instance
|
||||
_documentation_agent = None
|
||||
|
||||
def get_documentation_agent():
|
||||
"""Returns a singleton instance of the documentation agent"""
|
||||
global _documentation_agent
|
||||
if _documentation_agent is None:
|
||||
_documentation_agent = DocumentationAgent()
|
||||
return _documentation_agent
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,233 +0,0 @@
|
|||
"""
|
||||
Enhanced base agent class for the Agentservice.
|
||||
Provides improved communication and document handling capabilities.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import Dict, Any, List, Optional, Tuple, Union
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentBase:
|
||||
"""
|
||||
Enhanced base agent class with improved communication capabilities.
|
||||
All specialized agents should inherit from this class.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the enhanced agent."""
|
||||
self.name = "base"
|
||||
self.capabilities = "Basic agent operations"
|
||||
self.result_format = "Text"
|
||||
# System dependencies
|
||||
self.ai_service = None
|
||||
|
||||
def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None):
|
||||
self.ai_service = ai_service
|
||||
|
||||
def get_config(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get detailed information about the agent.
|
||||
|
||||
Returns:
|
||||
Dictionary with agent information
|
||||
"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format,
|
||||
}
|
||||
|
||||
def get_capabilities(self) -> List[str]:
|
||||
"""
|
||||
Get a list of agent capabilities.
|
||||
|
||||
Returns:
|
||||
List of capability strings
|
||||
"""
|
||||
# Split capabilities into a list
|
||||
if isinstance(self.capabilities, str):
|
||||
return [cap.strip() for cap in self.capabilities.split(",")]
|
||||
return []
|
||||
|
||||
def get_supported_formats(self) -> List[str]:
|
||||
"""
|
||||
Get supported output formats.
|
||||
|
||||
Returns:
|
||||
List of supported format strings
|
||||
"""
|
||||
if isinstance(self.result_format, str):
|
||||
return [fmt.strip() for fmt in self.result_format.split(",")]
|
||||
return ["Text"]
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a message and generate a response.
|
||||
|
||||
Args:
|
||||
message: Input message
|
||||
context: Optional context information
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
# Basic implementation - should be overridden by specialized agents
|
||||
if not self.ai_service:
|
||||
logger.warning(f"Agent {self.id} has no AI service configured")
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"I'm {self.name}, but I'm not properly configured. Please set up the AI service.",
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"result_format": "Text"
|
||||
}
|
||||
|
||||
# Process documents if available and set up document handler
|
||||
document_context = ""
|
||||
if self.supports_documents and self.document_handler and message.get("documents"):
|
||||
document_context = await self._process_documents(message)
|
||||
|
||||
# Create enhanced prompt
|
||||
prompt = self._create_enhanced_prompt(message, document_context, context)
|
||||
|
||||
# Generate response
|
||||
try:
|
||||
response_content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": self._get_system_prompt()},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Process the response to extract any special instructions or status
|
||||
content, status = self._process_response(response_content)
|
||||
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": content,
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"status": status,
|
||||
"workflow_id": message.get("workflow_id"),
|
||||
"documents": message.get("documents", []) # Pass through documents
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error in agent {self.id}: {str(e)}")
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"I encountered an error: {str(e)}",
|
||||
"agent_id": self.id,
|
||||
"agent_type": self.type,
|
||||
"result_format": "Text",
|
||||
"status": "error"
|
||||
}
|
||||
|
||||
async def _process_documents(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Process documents in the message.
|
||||
|
||||
Args:
|
||||
message: Input message with documents
|
||||
|
||||
Returns:
|
||||
Document context as text
|
||||
"""
|
||||
# Simply extract text from documents
|
||||
if not self.document_handler:
|
||||
return ""
|
||||
|
||||
return self.document_handler.merge_document_contents(message)
|
||||
|
||||
def _create_enhanced_prompt(self, message: Dict[str, Any], document_context: str, context: Dict[str, Any] = None) -> str:
|
||||
"""
|
||||
Create an enhanced prompt with context.
|
||||
|
||||
Args:
|
||||
message: Input message
|
||||
document_context: Document context
|
||||
context: Optional additional context
|
||||
|
||||
Returns:
|
||||
Enhanced prompt
|
||||
"""
|
||||
prompt = message.get("content", "")
|
||||
|
||||
# Add document context if available
|
||||
if document_context:
|
||||
prompt += f"\n\n=== DOCUMENT CONTEXT ===\n{document_context}"
|
||||
|
||||
# Add any additional context
|
||||
if context:
|
||||
# Add expected format if specified
|
||||
if "expected_format" in context:
|
||||
prompt += f"\n\nPlease format your response as: {context['expected_format']}"
|
||||
|
||||
# Add dependency outputs if available
|
||||
if "dependency_outputs" in context:
|
||||
prompt += "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n"
|
||||
for key, value in context["dependency_outputs"].items():
|
||||
if isinstance(value, dict) and "content" in value:
|
||||
prompt += f"\n--- {key} ---\n{value['content']}\n"
|
||||
else:
|
||||
prompt += f"\n--- {key} ---\n{str(value)}\n"
|
||||
|
||||
return prompt
|
||||
|
||||
def _get_system_prompt(self) -> str:
|
||||
"""
|
||||
Get the system prompt for the agent.
|
||||
|
||||
Returns:
|
||||
System prompt string
|
||||
"""
|
||||
return f"""
|
||||
You are {self.name}, a specialized {self.type} agent.
|
||||
|
||||
{self.description}
|
||||
|
||||
Your capabilities include: {self.capabilities}
|
||||
|
||||
You should format your responses according to: {self.result_format}
|
||||
|
||||
Respond clearly and helpfully to the user's request.
|
||||
When appropriate, include a status indicator at the end of your message:
|
||||
|
||||
[STATUS: COMPLETE] - When you've fully addressed the request
|
||||
[STATUS: PARTIAL] - When you've partially addressed the request
|
||||
[STATUS: QUESTION] - When you need more information
|
||||
"""
|
||||
|
||||
def _process_response(self, response: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Process the response to extract status and clean content.
|
||||
|
||||
Args:
|
||||
response: Raw response from the AI
|
||||
|
||||
Returns:
|
||||
Tuple of (cleaned content, status)
|
||||
"""
|
||||
# Default status
|
||||
status = "complete"
|
||||
|
||||
# Check for status tags
|
||||
import re
|
||||
status_match = re.search(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', response, re.IGNORECASE)
|
||||
|
||||
if status_match:
|
||||
status_value = status_match.group(1).lower()
|
||||
# Remove the status tag
|
||||
content = re.sub(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', '', response, flags=re.IGNORECASE).strip()
|
||||
return content, status_value
|
||||
|
||||
return response, status
|
||||
|
||||
# Factory functions
|
||||
def get_enhanced_base_agent() -> BaseAgent:
|
||||
"""Get an instance of the enhanced base agent."""
|
||||
return BaseAgent()
|
||||
|
|
@ -1,921 +0,0 @@
|
|||
"""
|
||||
Refactored helper function for intelligent data extraction (continued).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def data_extraction(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
lucydom_interface = None,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None,
|
||||
document_handler = None # Add document handler parameter
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Performs AI-driven data extraction with improved document and image handling.
|
||||
|
||||
Args:
|
||||
prompt: Specification of what data to extract
|
||||
files: List of all available files with metadata
|
||||
messages: List of all messages in the workflow
|
||||
ai_service: Service for AI requests
|
||||
lucydom_interface: Interface for database access (optional)
|
||||
workflow_id: Optional workflow ID for logging
|
||||
add_log_func: Optional function for adding logs
|
||||
document_handler: Optional document handler for structured document operations
|
||||
|
||||
Returns:
|
||||
Structured text object with extracted data and context information
|
||||
"""
|
||||
try:
|
||||
# Log extraction start
|
||||
_log(add_log_func, workflow_id, f"Starting data extraction with {len(files)} files", "info")
|
||||
|
||||
# Create enhanced extraction plan using AI
|
||||
_log(add_log_func, workflow_id, "Creating extraction plan", "info")
|
||||
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
|
||||
|
||||
# If we have extraction plan, log summary
|
||||
if extraction_plan:
|
||||
extract_needed_count = sum(1 for item in extraction_plan if item.get("extract_needed", False))
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Extraction plan created: {len(extraction_plan)} files, {extract_needed_count} need extraction", "info")
|
||||
|
||||
# Execute extractions, preferring document handler if available
|
||||
if document_handler:
|
||||
_log(add_log_func, workflow_id, "Using document handler for extraction", "info")
|
||||
extracted_data = await _execute_extractions_with_handler(
|
||||
extraction_plan,
|
||||
files,
|
||||
messages,
|
||||
document_handler,
|
||||
ai_service,
|
||||
workflow_id,
|
||||
add_log_func
|
||||
)
|
||||
else:
|
||||
# Fall back to original implementation
|
||||
_log(add_log_func, workflow_id, "Using fallback extraction method", "info")
|
||||
extracted_data = await _execute_extractions(
|
||||
extraction_plan,
|
||||
files,
|
||||
messages,
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id,
|
||||
add_log_func
|
||||
)
|
||||
|
||||
# Structure extracted data
|
||||
_log(add_log_func, workflow_id, f"Structuring extracted data from {len(extracted_data)} files", "info")
|
||||
structured_result = _structure_extracted_data(extracted_data, files, prompt)
|
||||
|
||||
# Enhance with contextual summaries using AI
|
||||
if ai_service and structured_result["extracted_content"]:
|
||||
_log(add_log_func, workflow_id, "Creating contextual summaries for extracted content", "info")
|
||||
|
||||
try:
|
||||
# Create a prompt for contextual summary
|
||||
summary_prompt = f"""
|
||||
Create concise, contextual summaries of the following extracted content according to this requirement:
|
||||
|
||||
REQUIREMENT: {prompt}
|
||||
|
||||
EXTRACTED CONTENT:
|
||||
"""
|
||||
|
||||
for item in structured_result["extracted_content"]:
|
||||
file_name = item.get("name", "Unnamed file")
|
||||
content_preview = item.get("content", "")[:500] + "..." if len(item.get("content", "")) > 500 else item.get("content", "")
|
||||
summary_prompt += f"\n--- {file_name} ---\n{content_preview}\n"
|
||||
|
||||
# Call AI for contextual summaries
|
||||
summaries = await ai_service.call_api([{"role": "user", "content": summary_prompt}])
|
||||
structured_result["contextual_summary"] = summaries
|
||||
|
||||
_log(add_log_func, workflow_id, "Added contextual summaries to extracted data", "info")
|
||||
except Exception as e:
|
||||
_log(add_log_func, workflow_id, f"Error creating contextual summaries: {str(e)}", "warning")
|
||||
|
||||
# Handle image-specific content separately
|
||||
image_content = [item for item in structured_result["extracted_content"]
|
||||
if "Image Analysis" in item.get("content", "") or item.get("type") == "image"]
|
||||
|
||||
if image_content and len(image_content) > 0:
|
||||
_log(add_log_func, workflow_id, f"Processing {len(image_content)} image-related content items", "info")
|
||||
|
||||
# Add image analysis summary if we have AI service
|
||||
if ai_service:
|
||||
try:
|
||||
# Create a prompt for image analysis summary
|
||||
image_summary_prompt = f"""
|
||||
Summarize the key visual information from these image analyses according to this requirement:
|
||||
|
||||
REQUIREMENT: {prompt}
|
||||
|
||||
IMAGE ANALYSES:
|
||||
"""
|
||||
|
||||
for item in image_content:
|
||||
file_name = item.get("name", "Unnamed image")
|
||||
content = item.get("content", "")
|
||||
image_summary_prompt += f"\n--- {file_name} ---\n{content}\n"
|
||||
|
||||
# Call AI for image analysis summary
|
||||
image_summaries = await ai_service.call_api([{"role": "user", "content": image_summary_prompt}])
|
||||
structured_result["image_analysis_summary"] = image_summaries
|
||||
|
||||
_log(add_log_func, workflow_id, "Added image analysis summary to extracted data", "info")
|
||||
except Exception as e:
|
||||
_log(add_log_func, workflow_id, f"Error creating image analysis summary: {str(e)}", "warning")
|
||||
|
||||
return structured_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in data extraction: {str(e)}", exc_info=True)
|
||||
|
||||
# Add error log
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Data extraction error: {str(e)}", "error")
|
||||
|
||||
# Return error result
|
||||
return {
|
||||
"error": str(e),
|
||||
"status": "error",
|
||||
"files_processed": len(files),
|
||||
"message": f"Data extraction failed: {str(e)}"
|
||||
}
|
||||
|
||||
|
||||
async def _execute_extractions_with_handler(
|
||||
extraction_plan: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
document_handler,
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute extractions using the document handler with enhanced image processing.
|
||||
|
||||
Args:
|
||||
extraction_plan: List of extraction instructions
|
||||
files: List of all available files
|
||||
messages: List of all messages
|
||||
document_handler: Document handler for structured operations
|
||||
ai_service: Service for AI requests
|
||||
workflow_id: Optional workflow ID for logging
|
||||
add_log_func: Optional function for adding logs
|
||||
|
||||
Returns:
|
||||
List with extracted data per file
|
||||
"""
|
||||
extracted_data = []
|
||||
|
||||
# Sort by importance (highest first)
|
||||
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
|
||||
|
||||
for extraction_item in sorted_plan:
|
||||
file_id = extraction_item.get("file_id")
|
||||
extract_needed = extraction_item.get("extract_needed", False)
|
||||
extraction_prompt = extraction_item.get("extraction_prompt", "")
|
||||
|
||||
# Find file metadata
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), None)
|
||||
|
||||
if not file_metadata:
|
||||
logger.warning(f"File with ID {file_id} not found")
|
||||
continue
|
||||
|
||||
file_name = file_metadata.get("name", "")
|
||||
file_type = file_metadata.get("type", "")
|
||||
content_type = file_metadata.get("content_type", "")
|
||||
|
||||
# Log extraction start
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Processing file: {file_name} (Extraction needed: {extract_needed})", "info")
|
||||
|
||||
# Only perform extraction if needed
|
||||
if extract_needed:
|
||||
# Check if file already exists in messages with content
|
||||
existing_content = _find_document_in_messages(file_id, messages)
|
||||
|
||||
if existing_content and existing_content.get("content"):
|
||||
# Content already exists, check if we need more specialized extraction
|
||||
current_context = existing_content.get("extraction_context", "")
|
||||
|
||||
# Check if new extraction prompt is different or more specific
|
||||
if extraction_prompt and extraction_prompt != current_context:
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Re-extracting {file_name} with new prompt: {extraction_prompt}", "info")
|
||||
|
||||
# Create an empty message to extract into
|
||||
empty_message = {}
|
||||
|
||||
# Use document handler to extract with new context
|
||||
try:
|
||||
result_message = await document_handler.add_file_to_message(
|
||||
empty_message,
|
||||
file_id,
|
||||
extraction_prompt
|
||||
)
|
||||
|
||||
# Get the document content from result
|
||||
if "documents" in result_message and result_message["documents"]:
|
||||
doc = result_message["documents"][0]
|
||||
|
||||
# Get text content
|
||||
content_text = ""
|
||||
is_extracted = False
|
||||
|
||||
for content in doc.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
content_text = content.get("text", "")
|
||||
is_extracted = content.get("is_extracted", False)
|
||||
break
|
||||
|
||||
# Create extraction result
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content_text,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_method": "document_handler_reextract",
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
# Check for additional documents (e.g., extracted images)
|
||||
for additional_doc in result_message.get("documents", [])[1:]:
|
||||
source = additional_doc.get("source", {})
|
||||
|
||||
# Skip if not an extracted document
|
||||
if source.get("type") != "extracted":
|
||||
continue
|
||||
|
||||
# Get content
|
||||
add_content_text = ""
|
||||
add_is_extracted = False
|
||||
|
||||
for content in additional_doc.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
add_content_text = content.get("text", "")
|
||||
add_is_extracted = content.get("is_extracted", False)
|
||||
break
|
||||
|
||||
# Add as separate extraction result
|
||||
if add_content_text:
|
||||
extracted_data.append({
|
||||
"file_id": source.get("id", f"extracted_{uuid.uuid4()}"),
|
||||
"name": source.get("name", f"Extracted from {file_name}"),
|
||||
"type": source.get("content_type", "image"),
|
||||
"content": add_content_text,
|
||||
"is_extracted": add_is_extracted,
|
||||
"extraction_method": "document_handler_extracted_component",
|
||||
"extraction_context": content.get("extraction_context", extraction_prompt),
|
||||
"parent_file_id": file_id
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Extracted embedded content from {file_name}", "info")
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Re-extracted {file_name} with new context", "info")
|
||||
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"Error re-extracting {file_name}: {str(e)}")
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Error re-extracting {file_name}: {str(e)}", "warning")
|
||||
|
||||
# Use existing content
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": existing_content.get("content", ""),
|
||||
"is_extracted": existing_content.get("is_extracted", False),
|
||||
"extraction_method": "existing_content",
|
||||
"extraction_context": current_context
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Using existing content for {file_name}", "info")
|
||||
|
||||
continue
|
||||
|
||||
# Need to extract content with document handler
|
||||
try:
|
||||
# Create an empty message to extract into
|
||||
empty_message = {}
|
||||
|
||||
# Use document handler to add file and extract content
|
||||
result_message = await document_handler.add_file_to_message(
|
||||
empty_message,
|
||||
file_id,
|
||||
extraction_prompt
|
||||
)
|
||||
|
||||
# Get the document content from result
|
||||
if "documents" in result_message and result_message["documents"]:
|
||||
# Process main document
|
||||
doc = result_message["documents"][0] # First document is the main file
|
||||
|
||||
# Get text content
|
||||
content_text = ""
|
||||
is_extracted = False
|
||||
|
||||
for content in doc.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
content_text = content.get("text", "")
|
||||
is_extracted = content.get("is_extracted", False)
|
||||
break
|
||||
|
||||
# Create extraction result for main document
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content_text,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_method": "document_handler",
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Extracted {file_name} using document handler", "info")
|
||||
|
||||
# Process additional documents (e.g., extracted images)
|
||||
for additional_doc in result_message.get("documents", [])[1:]:
|
||||
source = additional_doc.get("source", {})
|
||||
|
||||
# Skip if not an extracted document
|
||||
if source.get("type") != "extracted":
|
||||
continue
|
||||
|
||||
# Get content
|
||||
add_content_text = ""
|
||||
add_is_extracted = False
|
||||
|
||||
for content in additional_doc.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
add_content_text = content.get("text", "")
|
||||
add_is_extracted = content.get("is_extracted", False)
|
||||
break
|
||||
|
||||
# Add as separate extraction result
|
||||
if add_content_text:
|
||||
extracted_data.append({
|
||||
"file_id": source.get("id", f"extracted_{uuid.uuid4()}"),
|
||||
"name": source.get("name", f"Extracted from {file_name}"),
|
||||
"type": source.get("content_type", "image"),
|
||||
"content": add_content_text,
|
||||
"is_extracted": add_is_extracted,
|
||||
"extraction_method": "document_handler_extracted_component",
|
||||
"extraction_context": content.get("extraction_context", extraction_prompt),
|
||||
"parent_file_id": file_id
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Extracted embedded content from {file_name}", "info")
|
||||
else:
|
||||
# Extraction failed
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Failed to extract content from {file_name}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "failed"
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Failed to extract content from {file_name}", "warning")
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting {file_name}: {str(e)}")
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Error extracting {file_name}: {str(e)}", "warning")
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Error extracting: {str(e)}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "error"
|
||||
})
|
||||
else:
|
||||
# No extraction needed, use existing content
|
||||
existing_content = _find_document_in_messages(file_id, messages)
|
||||
|
||||
if existing_content:
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": existing_content.get("content", ""),
|
||||
"is_extracted": existing_content.get("is_extracted", False),
|
||||
"extraction_method": "existing_content",
|
||||
"extraction_context": existing_content.get("extraction_context", "")
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"Using existing content for {file_name}", "info")
|
||||
else:
|
||||
# No existing content found
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"No content available for {file_name}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
|
||||
_log(add_log_func, workflow_id,
|
||||
f"No content available for {file_name}", "warning")
|
||||
|
||||
return extracted_data
|
||||
|
||||
|
||||
def _find_document_in_messages(file_id: int, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Find a document by file ID in workflow messages.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file to find
|
||||
messages: List of messages to search
|
||||
|
||||
Returns:
|
||||
Dictionary with document information or empty dict if not found
|
||||
"""
|
||||
for message in messages:
|
||||
for doc_index, document in enumerate(message.get("documents", [])):
|
||||
source = document.get("source", {})
|
||||
|
||||
# Check if file ID matches
|
||||
if source.get("id") == str(file_id) or source.get("id") == file_id:
|
||||
# Found the document
|
||||
content_text = ""
|
||||
is_extracted = False
|
||||
|
||||
# Look for text content
|
||||
for content in document.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
content_text = content.get("text", "")
|
||||
is_extracted = content.get("is_extracted", False)
|
||||
break
|
||||
|
||||
return {
|
||||
"document_id": document.get("id"),
|
||||
"message_id": message.get("id"),
|
||||
"content": content_text,
|
||||
"is_extracted": is_extracted
|
||||
}
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
async def _create_extraction_plan(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Erstellt einen Extraktionsplan mit AI-Unterstützung.
|
||||
|
||||
Args:
|
||||
prompt: Spezifizierung, welche Daten extrahiert werden sollen
|
||||
files: Liste aller verfügbaren Dateien mit Metadaten
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
ai_service: Service für KI-Anfragen
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Extraktionsplan (Liste von Extraktionsanweisungen pro Datei)
|
||||
"""
|
||||
# Erstelle Kontext-Informationen für den AI Call
|
||||
file_infos = []
|
||||
for file in files:
|
||||
# Basis-Metadaten
|
||||
file_info = {
|
||||
"id": file.get("id", ""),
|
||||
"name": file.get("name", ""),
|
||||
"type": file.get("type", ""),
|
||||
"content_type": file.get("content_type", ""),
|
||||
"size": file.get("size", "")
|
||||
}
|
||||
|
||||
# Extraktionsstatus prüfen (falls vorhanden)
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
|
||||
if doc_contents:
|
||||
# Prüfen, ob mindestens ein Content mit is_extracted=True existiert
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
)
|
||||
file_info["already_extracted"] = already_extracted
|
||||
|
||||
# Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar)
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text" and content.get("text"):
|
||||
preview_text = content.get("text", "")[:200] + "..." if len(content.get("text", "")) > 200 else content.get("text", "")
|
||||
file_info["content_preview"] = preview_text
|
||||
break
|
||||
else:
|
||||
file_info["already_extracted"] = False
|
||||
|
||||
file_infos.append(file_info)
|
||||
|
||||
# AI-Prompt erstellen
|
||||
extraction_prompt = f"""
|
||||
Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien
|
||||
und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen.
|
||||
|
||||
AUFGABE:
|
||||
{prompt}
|
||||
|
||||
VERFÜGBARE DATEIEN:
|
||||
{json.dumps(file_infos, indent=2)}
|
||||
|
||||
Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen:
|
||||
1. file_id: Die ID der zu extrahierenden Datei
|
||||
2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird)
|
||||
3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien)
|
||||
4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist)
|
||||
|
||||
Format:
|
||||
[
|
||||
{{
|
||||
"file_id": 1234,
|
||||
"extract_needed": true,
|
||||
"extraction_prompt": "Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen",
|
||||
"importance": 5
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
Gib nur das JSON-Array zurück, ohne weitere Erklärungen.
|
||||
"""
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, "Extraktionsplan wird erstellt...", "info")
|
||||
|
||||
try:
|
||||
# AI-Call durchführen
|
||||
extraction_plan_response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
||||
|
||||
# JSON aus der Antwort extrahieren
|
||||
import re
|
||||
json_match = re.search(r'\[.*\]', extraction_plan_response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
extraction_plan = json.loads(json_match.group(0))
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Extraktionsplan erstellt für {len(extraction_plan)} Dateien",
|
||||
"info"
|
||||
)
|
||||
|
||||
return extraction_plan
|
||||
else:
|
||||
# Fallback bei Parsing-Problemen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
"Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan",
|
||||
"warning"
|
||||
)
|
||||
|
||||
# Standard-Plan: Alle nicht extrahierten Dateien extrahieren
|
||||
default_plan = []
|
||||
for file in files:
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
) if doc_contents else False
|
||||
|
||||
default_plan.append({
|
||||
"file_id": file.get("id", 0),
|
||||
"extract_needed": not already_extracted,
|
||||
"extraction_prompt": f"Extrahiere alle relevanten Informationen aus {file.get('name', '')}",
|
||||
"importance": 3
|
||||
})
|
||||
|
||||
return default_plan
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", exc_info=True)
|
||||
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}",
|
||||
"error"
|
||||
)
|
||||
|
||||
# Leerer Plan bei Fehlern
|
||||
return []
|
||||
|
||||
async def _execute_extractions(
|
||||
extraction_plan: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None,
|
||||
logging_utils = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute the planned extractions.
|
||||
|
||||
Args:
|
||||
extraction_plan: List of extraction instructions
|
||||
files: List of all available files
|
||||
lucydom_interface: Interface for database access
|
||||
ai_service: Service for AI requests
|
||||
workflow_id: Optional workflow ID for logging
|
||||
add_log_func: Optional function for adding logs
|
||||
logging_utils: Optional logging utility
|
||||
|
||||
Returns:
|
||||
List with extracted data per file
|
||||
"""
|
||||
extracted_data = []
|
||||
|
||||
# Sort by importance
|
||||
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
|
||||
|
||||
for extraction_item in sorted_plan:
|
||||
file_id = extraction_item.get("file_id")
|
||||
extract_needed = extraction_item.get("extract_needed", False)
|
||||
extraction_prompt = extraction_item.get("extraction_prompt", "")
|
||||
|
||||
# Find file metadata
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), None)
|
||||
|
||||
if not file_metadata:
|
||||
logger.warning(f"File with ID {file_id} not found")
|
||||
continue
|
||||
|
||||
file_name = file_metadata.get("name", "")
|
||||
file_type = file_metadata.get("type", "")
|
||||
content_type = file_metadata.get("content_type", "")
|
||||
|
||||
# Add log
|
||||
if logging_utils:
|
||||
logging_utils.info(f"Processing file: {file_name} (Extraction needed: {extract_needed})", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Processing file: {file_name} (Extraction needed: {extract_needed})",
|
||||
"info"
|
||||
)
|
||||
|
||||
# Only perform extraction if needed
|
||||
if extract_needed:
|
||||
# Get file content via LucyDOM interface
|
||||
if lucydom_interface:
|
||||
try:
|
||||
file_content = await lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if not file_content:
|
||||
if logging_utils:
|
||||
logging_utils.warning(f"File {file_name} not found", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"File {file_name} not found", "warning")
|
||||
continue
|
||||
|
||||
# Perform extraction based on file type
|
||||
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
# Image analysis with AI service
|
||||
if ai_service and hasattr(ai_service, "analyze_image"):
|
||||
try:
|
||||
image_analysis = await ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=extraction_prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": image_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_method": "image_analysis"
|
||||
})
|
||||
|
||||
if logging_utils:
|
||||
logging_utils.info(f"Image {file_name} successfully analyzed", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Image {file_name} successfully analyzed", "info")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error analyzing image {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
# Fallback if no image analysis available
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Image: {file_name} (Analysis not available)",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
else:
|
||||
# Text-based extraction for all other file types
|
||||
try:
|
||||
# Import directly here to avoid circular imports
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_method": "text_extraction"
|
||||
})
|
||||
|
||||
if logging_utils:
|
||||
logging_utils.info(f"File {file_name} extracted (Status: {is_extracted})", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"File {file_name} extracted (Status: {is_extracted})",
|
||||
"info"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error extracting text from {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error extracting text from {file_name}: {str(e)}", "error")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error reading file {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error reading file {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
logger.warning(f"No LucyDOM interface available for file {file_name}")
|
||||
if logging_utils:
|
||||
logging_utils.warning(f"No LucyDOM interface available for file {file_name}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"No LucyDOM interface available for file {file_name}", "warning")
|
||||
else:
|
||||
# No extraction needed, use existing content
|
||||
doc_contents = _extract_document_contents_from_messages(file_id, messages)
|
||||
|
||||
if doc_contents:
|
||||
# Use first text content
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text":
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content.get("text", ""),
|
||||
"is_extracted": content.get("is_extracted", False),
|
||||
"extraction_method": "existing_content"
|
||||
})
|
||||
break
|
||||
else:
|
||||
# No existing content found
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"No content available for {file_name}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
|
||||
return extracted_data
|
||||
|
||||
def _structure_extracted_data(
|
||||
extracted_data: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
prompt: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Structure the extracted data into a formatted result.
|
||||
|
||||
Args:
|
||||
extracted_data: List of extracted data per file
|
||||
files: List of all available files
|
||||
prompt: Original extraction prompt
|
||||
|
||||
Returns:
|
||||
Structured result object
|
||||
"""
|
||||
# Create base structure
|
||||
result = {
|
||||
"prompt": prompt,
|
||||
"files_processed": len(extracted_data),
|
||||
"total_files": len(files),
|
||||
"extraction_timestamp": datetime.now().isoformat(),
|
||||
"status": "success",
|
||||
"extracted_content": []
|
||||
}
|
||||
|
||||
# Add extracted content
|
||||
for data_item in extracted_data:
|
||||
# Enrich with file metadata
|
||||
file_id = data_item.get("file_id", 0)
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), {})
|
||||
|
||||
content_item = {
|
||||
"file_id": file_id,
|
||||
"name": data_item.get("name", file_metadata.get("name", "")),
|
||||
"type": data_item.get("type", file_metadata.get("type", "")),
|
||||
"content_type": file_metadata.get("content_type", ""),
|
||||
"size": file_metadata.get("size", ""),
|
||||
"is_extracted": data_item.get("is_extracted", False),
|
||||
"extraction_method": data_item.get("extraction_method", ""),
|
||||
"content": data_item.get("content", "")
|
||||
}
|
||||
|
||||
result["extracted_content"].append(content_item)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract document contents for a specific file from workflow messages.
|
||||
Enhanced to handle the new document structure.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file
|
||||
messages: List of all messages in the workflow
|
||||
|
||||
Returns:
|
||||
List of document contents for the specified file
|
||||
"""
|
||||
contents = []
|
||||
|
||||
for message in messages:
|
||||
# Search documents in the message
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
|
||||
# Check if file ID matches (handle both string and int comparison)
|
||||
if (source.get("id") == file_id or
|
||||
(isinstance(source.get("id"), str) and source.get("id") == str(file_id)) or
|
||||
(isinstance(file_id, str) and source.get("id") == file_id)):
|
||||
|
||||
# Add contents of the file
|
||||
doc_contents = document.get("contents", [])
|
||||
|
||||
if doc_contents:
|
||||
# Ensure each content has document reference
|
||||
for content in doc_contents:
|
||||
content_copy = content.copy()
|
||||
content_copy["document_id"] = document.get("id")
|
||||
content_copy["message_id"] = message.get("id")
|
||||
contents.append(content_copy)
|
||||
|
||||
return contents
|
||||
|
||||
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Helper function for logging with different log functions"""
|
||||
# Log via logger instance
|
||||
if log_type == "error":
|
||||
logger.error(message)
|
||||
elif log_type == "warning":
|
||||
logger.warning(message)
|
||||
else:
|
||||
logger.info(message)
|
||||
|
||||
# Log via provided log function (if available)
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
|
||||
|
|
@ -1,890 +0,0 @@
|
|||
"""
|
||||
Enhanced document handling module for the Agentservice (continued).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentHandler:
|
||||
"""
|
||||
Centralized document handler for consistent document management across the system.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None, lucydom_interface = None, ai_service = None):
|
||||
"""Initialize the document handler."""
|
||||
self.workflow_id = workflow_id
|
||||
self.lucydom_interface = lucydom_interface
|
||||
self.ai_service = ai_service
|
||||
|
||||
# Import necessary utilities
|
||||
from modules.agentservice_filemanager import get_file_manager
|
||||
self.file_manager = get_file_manager()
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Set or update the workflow ID."""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_lucydom_interface(self, lucydom_interface):
|
||||
"""Set or update the LucyDOM interface."""
|
||||
self.lucydom_interface = lucydom_interface
|
||||
|
||||
def set_ai_service(self, ai_service):
|
||||
"""Set or update the AI service."""
|
||||
self.ai_service = ai_service
|
||||
|
||||
|
||||
async def add_file_to_message(self, message: Dict[str, Any], file_id: int, extraction_prompt: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Add a file to a message with contextual extraction.
|
||||
|
||||
Args:
|
||||
message: The message to add the file to
|
||||
file_id: ID of the file to add
|
||||
extraction_prompt: Optional prompt for contextual extraction (e.g., for images)
|
||||
|
||||
Returns:
|
||||
Updated message with the file added
|
||||
"""
|
||||
if not self.lucydom_interface:
|
||||
logger.error("LucyDOM interface not available")
|
||||
return message
|
||||
|
||||
try:
|
||||
# Get file metadata
|
||||
file = self.lucydom_interface.get_file(file_id)
|
||||
if not file:
|
||||
logger.warning(f"File with ID {file_id} not found")
|
||||
return message
|
||||
|
||||
# Get necessary file information
|
||||
file_name = file.get("name", "unnamed_file")
|
||||
file_type = file.get("type", "unknown")
|
||||
content_type = file.get("content_type")
|
||||
|
||||
# Initialize documents array if needed
|
||||
if "documents" not in message:
|
||||
message["documents"] = []
|
||||
|
||||
# Check if file is already in the message
|
||||
file_already_added = any(
|
||||
doc.get("source", {}).get("id") == str(file_id)
|
||||
for doc in message.get("documents", [])
|
||||
)
|
||||
|
||||
if file_already_added:
|
||||
logger.info(f"File {file_name} already exists in message, skipping")
|
||||
return message
|
||||
|
||||
# Create a unique document ID
|
||||
doc_id = f"doc_{uuid.uuid4()}"
|
||||
|
||||
# Create document structure
|
||||
document = {
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": str(file_id),
|
||||
"name": file_name,
|
||||
"content_type": content_type,
|
||||
"size": file.get("size"),
|
||||
"upload_date": file.get("upload_date", datetime.now().isoformat())
|
||||
},
|
||||
"contents": []
|
||||
}
|
||||
|
||||
# Only read content if we have extraction prompt or specific types
|
||||
if (extraction_prompt or
|
||||
file_type in ["document", "text"] or
|
||||
(content_type and content_type.startswith("text/"))):
|
||||
|
||||
# Read file content
|
||||
file_content = await self.lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if file_content:
|
||||
# Process based on file type
|
||||
if file_type == "image" or (content_type and content_type.startswith("image/")):
|
||||
# Image analysis if prompt provided
|
||||
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
|
||||
try:
|
||||
# Use provided prompt or default one
|
||||
image_prompt = extraction_prompt or "Describe this image in detail"
|
||||
|
||||
logger.info(f"Analyzing image {file_name} with prompt: {image_prompt}")
|
||||
|
||||
image_analysis = await self.ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=image_prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
# Add the analysis as text content
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"Image Analysis:\n{image_analysis}",
|
||||
"is_extracted": True,
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
logger.info(f"Added image analysis for {file_name} to message")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {file_name}: {str(e)}")
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"Image file: {file_name} (Analysis failed: {str(e)})",
|
||||
"is_extracted": False
|
||||
})
|
||||
else:
|
||||
# Just add placeholder if no analysis available
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"Image file: {file_name} (no analysis requested)",
|
||||
"is_extracted": False
|
||||
})
|
||||
|
||||
# Enhanced PDF processing - extract text and images
|
||||
elif file_name.lower().endswith('.pdf'):
|
||||
logger.info(f"Processing PDF file: {file_name}")
|
||||
|
||||
# Extract text content first
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
# Add text content
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": text_content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
logger.info(f"Extracted text content from PDF {file_name}")
|
||||
|
||||
# Extract and analyze images from PDF if we have AI service
|
||||
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
|
||||
try:
|
||||
# Import necessary modules
|
||||
import fitz # PyMuPDF
|
||||
from io import BytesIO
|
||||
|
||||
# Add detailed logging
|
||||
logger.info(f"Starting PDF image extraction for {file_name}")
|
||||
|
||||
# Check if extraction prompt is available or use default
|
||||
image_prompt = extraction_prompt or "Describe this image from the PDF document"
|
||||
|
||||
# Open PDF from memory stream with detailed error checking
|
||||
try:
|
||||
pdf_document = fitz.open(stream=file_content, filetype="pdf")
|
||||
logger.info(f"Successfully opened PDF with {len(pdf_document)} pages")
|
||||
except Exception as pdf_open_error:
|
||||
logger.error(f"Failed to open PDF: {str(pdf_open_error)}")
|
||||
raise
|
||||
|
||||
# Initialize images list and image count
|
||||
images_analysis = []
|
||||
image_count = 0
|
||||
|
||||
# Process each page
|
||||
for page_num, page in enumerate(pdf_document, 1):
|
||||
# Get list of images on the page
|
||||
image_list = page.get_images(full=True)
|
||||
|
||||
if image_list:
|
||||
logger.info(f"Found {len(image_list)} images on page {page_num}")
|
||||
|
||||
# Process each image
|
||||
for img_index, img in enumerate(image_list):
|
||||
try:
|
||||
xref = img[0] # Get image reference
|
||||
|
||||
# Extract image data
|
||||
base_image = pdf_document.extract_image(xref)
|
||||
image_bytes = base_image["image"]
|
||||
image_ext = base_image["ext"]
|
||||
|
||||
# Analyze image
|
||||
image_analysis = await self.ai_service.analyze_image(
|
||||
image_data=image_bytes,
|
||||
prompt=f"{image_prompt} (Page {page_num}, Image {img_index+1})",
|
||||
mime_type=f"image/{image_ext}"
|
||||
)
|
||||
|
||||
# Add to analysis list
|
||||
images_analysis.append({
|
||||
"page": page_num,
|
||||
"index": img_index + 1,
|
||||
"analysis": image_analysis
|
||||
})
|
||||
|
||||
image_count += 1
|
||||
logger.info(f"Analyzed image {img_index+1} on page {page_num}")
|
||||
|
||||
# Create a separate document for each extracted image if needed
|
||||
if True: # Set to condition if you want to control this
|
||||
img_doc_id = f"img_doc_{uuid.uuid4()}"
|
||||
image_filename = f"page{page_num}_image{img_index+1}.{image_ext}"
|
||||
|
||||
image_document = {
|
||||
"id": img_doc_id,
|
||||
"source": {
|
||||
"type": "extracted",
|
||||
"parent_id": str(file_id),
|
||||
"id": img_doc_id,
|
||||
"name": image_filename,
|
||||
"content_type": f"image/{image_ext}",
|
||||
"size": len(image_bytes)
|
||||
},
|
||||
"contents": [{
|
||||
"type": "text",
|
||||
"text": f"Image Analysis (PDF Page {page_num}, Image {img_index+1}):\n{image_analysis}",
|
||||
"is_extracted": True,
|
||||
"extraction_context": image_prompt
|
||||
}]
|
||||
}
|
||||
|
||||
# Add image document to message
|
||||
message["documents"].append(image_document)
|
||||
logger.info(f"Added extracted image document {image_filename} to message")
|
||||
|
||||
except Exception as img_err:
|
||||
logger.warning(f"Error processing image {img_index} on page {page_num}: {str(img_err)}")
|
||||
|
||||
# Close the PDF
|
||||
pdf_document.close()
|
||||
|
||||
# Add combined image analysis to the main document
|
||||
if images_analysis:
|
||||
combined_analysis = "\n\n## Embedded Images Analysis\n\n"
|
||||
for img in images_analysis:
|
||||
combined_analysis += f"### Page {img['page']}, Image {img['index']}\n{img['analysis']}\n\n"
|
||||
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": combined_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_context": f"Analysis of {image_count} images embedded in the PDF"
|
||||
})
|
||||
|
||||
logger.info(f"Added combined analysis of {image_count} PDF images to document")
|
||||
except ImportError:
|
||||
logger.warning("PyMuPDF (fitz) is not installed, skipping PDF image extraction")
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": "\n\nNote: PDF may contain images that were not extracted due to missing libraries.",
|
||||
"is_extracted": False
|
||||
})
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting images from PDF {file_name}: {str(e)}")
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"\n\nError extracting images from PDF: {str(e)}",
|
||||
"is_extracted": False
|
||||
})
|
||||
|
||||
# Word document processing with image extraction
|
||||
elif file_name.lower().endswith(('.docx', '.doc')):
|
||||
logger.info(f"Processing Word document: {file_name}")
|
||||
|
||||
# Extract text content first
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
# Add text content
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": text_content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
logger.info(f"Extracted text content from Word document {file_name}")
|
||||
|
||||
# Attempt to extract and analyze images from Word document
|
||||
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
|
||||
try:
|
||||
# For .docx documents
|
||||
if file_name.lower().endswith('.docx'):
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
|
||||
# Check if extraction prompt is available or use default
|
||||
image_prompt = extraction_prompt or "Describe this image from the Word document"
|
||||
|
||||
# Create a zipfile object from the .docx content
|
||||
docx_zip = zipfile.ZipFile(BytesIO(file_content))
|
||||
|
||||
# Images in .docx are stored in the "word/media" directory
|
||||
image_files = [f for f in docx_zip.namelist() if f.startswith('word/media/')]
|
||||
|
||||
if image_files:
|
||||
logger.info(f"Found {len(image_files)} images in Word document {file_name}")
|
||||
|
||||
# Process each image
|
||||
images_analysis = []
|
||||
for i, img_path in enumerate(image_files):
|
||||
try:
|
||||
# Extract image data
|
||||
image_bytes = docx_zip.read(img_path)
|
||||
|
||||
# Determine image type from filename
|
||||
image_ext = img_path.split('.')[-1] if '.' in img_path else 'png'
|
||||
|
||||
# Analyze image
|
||||
image_analysis = await self.ai_service.analyze_image(
|
||||
image_data=image_bytes,
|
||||
prompt=f"{image_prompt} (Image {i+1})",
|
||||
mime_type=f"image/{image_ext}"
|
||||
)
|
||||
|
||||
# Add to analysis list
|
||||
images_analysis.append({
|
||||
"index": i + 1,
|
||||
"path": img_path,
|
||||
"analysis": image_analysis
|
||||
})
|
||||
|
||||
logger.info(f"Analyzed image {i+1} ({img_path}) from Word document")
|
||||
|
||||
# Create a separate document for each extracted image if needed
|
||||
img_doc_id = f"img_doc_{uuid.uuid4()}"
|
||||
image_filename = f"word_image{i+1}.{image_ext}"
|
||||
|
||||
image_document = {
|
||||
"id": img_doc_id,
|
||||
"source": {
|
||||
"type": "extracted",
|
||||
"parent_id": str(file_id),
|
||||
"id": img_doc_id,
|
||||
"name": image_filename,
|
||||
"content_type": f"image/{image_ext}",
|
||||
"size": len(image_bytes)
|
||||
},
|
||||
"contents": [{
|
||||
"type": "text",
|
||||
"text": f"Image Analysis (Word Document Image {i+1}):\n{image_analysis}",
|
||||
"is_extracted": True,
|
||||
"extraction_context": image_prompt
|
||||
}]
|
||||
}
|
||||
|
||||
# Add image document to message
|
||||
message["documents"].append(image_document)
|
||||
logger.info(f"Added extracted image document {image_filename} to message")
|
||||
|
||||
except Exception as img_err:
|
||||
logger.warning(f"Error processing image {img_path}: {str(img_err)}")
|
||||
|
||||
# Add combined image analysis to the main document
|
||||
if images_analysis:
|
||||
combined_analysis = "\n\n## Embedded Images Analysis\n\n"
|
||||
for img in images_analysis:
|
||||
combined_analysis += f"### Image {img['index']}\n{img['analysis']}\n\n"
|
||||
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": combined_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_context": f"Analysis of {len(images_analysis)} images embedded in the Word document"
|
||||
})
|
||||
|
||||
logger.info(f"Added combined analysis of {len(images_analysis)} Word document images")
|
||||
|
||||
# Close the zip file
|
||||
docx_zip.close()
|
||||
|
||||
# Note: For .doc (older format) we would need additional libraries
|
||||
# This could be implemented with libraries like antiword or pywin32
|
||||
elif file_name.lower().endswith('.doc'):
|
||||
logger.warning("Image extraction from .doc files is not supported yet")
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": "\n\nNote: This is an older .doc format document. Images may be present but could not be extracted.",
|
||||
"is_extracted": False
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting images from Word document {file_name}: {str(e)}")
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"\n\nError extracting images from Word document: {str(e)}",
|
||||
"is_extracted": False
|
||||
})
|
||||
|
||||
# Excel file processing with enhanced capabilities
|
||||
elif file_name.lower().endswith(('.xlsx', '.xls')):
|
||||
logger.info(f"Processing Excel document: {file_name}")
|
||||
|
||||
# Extract text representation of spreadsheet data
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
# Add text content
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": text_content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
logger.info(f"Extracted data from Excel document {file_name}")
|
||||
|
||||
# Try to extract charts and images if available
|
||||
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
|
||||
try:
|
||||
# For .xlsx files (newer format)
|
||||
if file_name.lower().endswith('.xlsx'):
|
||||
import zipfile
|
||||
from io import BytesIO
|
||||
|
||||
# Create a zipfile object from the Excel content
|
||||
xlsx_zip = zipfile.ZipFile(BytesIO(file_content))
|
||||
|
||||
# Charts and images can be in various directories
|
||||
media_paths = [
|
||||
'xl/media/',
|
||||
'xl/drawings/',
|
||||
'xl/charts/'
|
||||
]
|
||||
|
||||
# Collect all potential media files
|
||||
media_files = []
|
||||
for path in media_paths:
|
||||
media_files.extend([f for f in xlsx_zip.namelist() if f.startswith(path)])
|
||||
|
||||
if media_files:
|
||||
logger.info(f"Found {len(media_files)} media files in Excel document {file_name}")
|
||||
|
||||
# Process image files (skip XML and other non-image files)
|
||||
image_extensions = ['png', 'jpeg', 'jpg', 'gif', 'bmp', 'tiff', 'emf', 'wmf']
|
||||
image_files = [f for f in media_files if f.split('.')[-1].lower() in image_extensions]
|
||||
|
||||
if image_files:
|
||||
logger.info(f"Found {len(image_files)} images/charts in Excel document {file_name}")
|
||||
|
||||
image_prompt = extraction_prompt or "Describe this chart/image from the Excel document"
|
||||
images_analysis = []
|
||||
|
||||
for i, img_path in enumerate(image_files):
|
||||
try:
|
||||
# Extract image data
|
||||
image_bytes = xlsx_zip.read(img_path)
|
||||
|
||||
# Determine image type from filename
|
||||
image_ext = img_path.split('.')[-1] if '.' in img_path else 'png'
|
||||
|
||||
# Analyze image
|
||||
image_analysis = await self.ai_service.analyze_image(
|
||||
image_data=image_bytes,
|
||||
prompt=f"{image_prompt} (Describe what this chart or image shows, including any data trends or patterns visible)",
|
||||
mime_type=f"image/{image_ext}"
|
||||
)
|
||||
|
||||
# Add to analysis list
|
||||
images_analysis.append({
|
||||
"index": i + 1,
|
||||
"path": img_path,
|
||||
"analysis": image_analysis
|
||||
})
|
||||
|
||||
logger.info(f"Analyzed image/chart {i+1} from Excel document")
|
||||
|
||||
# Create a separate document for each extracted image
|
||||
img_doc_id = f"img_doc_{uuid.uuid4()}"
|
||||
image_filename = f"excel_image{i+1}.{image_ext}"
|
||||
|
||||
image_document = {
|
||||
"id": img_doc_id,
|
||||
"source": {
|
||||
"type": "extracted",
|
||||
"parent_id": str(file_id),
|
||||
"id": img_doc_id,
|
||||
"name": image_filename,
|
||||
"content_type": f"image/{image_ext}",
|
||||
"size": len(image_bytes)
|
||||
},
|
||||
"contents": [{
|
||||
"type": "text",
|
||||
"text": f"Chart/Image Analysis (Excel Document Item {i+1}):\n{image_analysis}",
|
||||
"is_extracted": True,
|
||||
"extraction_context": image_prompt
|
||||
}]
|
||||
}
|
||||
|
||||
# Add image document to message
|
||||
message["documents"].append(image_document)
|
||||
|
||||
except Exception as img_err:
|
||||
logger.warning(f"Error processing image {img_path}: {str(img_err)}")
|
||||
|
||||
# Add combined image analysis to the main document
|
||||
if images_analysis:
|
||||
combined_analysis = "\n\n## Embedded Charts and Images Analysis\n\n"
|
||||
for img in images_analysis:
|
||||
combined_analysis += f"### Chart/Image {img['index']}\n{img['analysis']}\n\n"
|
||||
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": combined_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_context": f"Analysis of {len(images_analysis)} charts/images from the Excel document"
|
||||
})
|
||||
|
||||
# Close the zip file
|
||||
xlsx_zip.close()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting charts/images from Excel document {file_name}: {str(e)}")
|
||||
|
||||
else:
|
||||
# For other file types, extract text
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_context": extraction_prompt
|
||||
})
|
||||
|
||||
logger.info(f"Added text content for {file_name} to message (extracted: {is_extracted})")
|
||||
else:
|
||||
# No content available
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"File content not available for {file_name}",
|
||||
"is_extracted": False
|
||||
})
|
||||
else:
|
||||
# Just add reference without content
|
||||
document["contents"].append({
|
||||
"type": "text",
|
||||
"text": f"File: {file_name} (content not loaded)",
|
||||
"is_extracted": False
|
||||
})
|
||||
|
||||
# Add document to message
|
||||
message["documents"].append(document)
|
||||
|
||||
logger.info(f"File {file_name} successfully added to message")
|
||||
return message
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error adding file {file_id} to message: {str(e)}")
|
||||
return message
|
||||
|
||||
|
||||
async def extract_document_content(self, doc_id: str, message: Dict[str, Any], extraction_prompt: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract or update document content with contextual extraction.
|
||||
|
||||
Args:
|
||||
doc_id: ID of the document to extract
|
||||
message: Message containing the document
|
||||
extraction_prompt: Contextual prompt for extraction
|
||||
|
||||
Returns:
|
||||
Updated message with extracted content
|
||||
"""
|
||||
if not message or "documents" not in message:
|
||||
return message
|
||||
|
||||
updated_message = message.copy()
|
||||
|
||||
# Find the document
|
||||
for i, document in enumerate(updated_message.get("documents", [])):
|
||||
if document.get("id") == doc_id:
|
||||
# Get file ID from source
|
||||
source = document.get("source", {})
|
||||
file_id = source.get("id")
|
||||
|
||||
if file_id and self.lucydom_interface:
|
||||
# Get file metadata
|
||||
file = self.lucydom_interface.get_file(int(file_id))
|
||||
if not file:
|
||||
continue
|
||||
|
||||
# Get file content
|
||||
file_content = await self.lucydom_interface.read_file_content(int(file_id))
|
||||
if not file_content:
|
||||
continue
|
||||
|
||||
# Process based on file type
|
||||
file_name = file.get("name", "unnamed_file")
|
||||
file_type = file.get("type", "unknown")
|
||||
content_type = file.get("content_type")
|
||||
|
||||
# Update content based on file type
|
||||
if file_type == "image" or (content_type and content_type.startswith("image/")):
|
||||
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
|
||||
try:
|
||||
image_analysis = await self.ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=extraction_prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
# Create or update content
|
||||
new_content = {
|
||||
"type": "text",
|
||||
"text": f"Image Analysis:\n{image_analysis}",
|
||||
"is_extracted": True,
|
||||
"extraction_context": extraction_prompt
|
||||
}
|
||||
|
||||
# Update or add content
|
||||
contents = document.get("contents", [])
|
||||
contents_updated = False
|
||||
|
||||
for j, content in enumerate(contents):
|
||||
if content.get("type") == "text":
|
||||
updated_message["documents"][i]["contents"][j] = new_content
|
||||
contents_updated = True
|
||||
break
|
||||
|
||||
if not contents_updated:
|
||||
if not updated_message["documents"][i].get("contents"):
|
||||
updated_message["documents"][i]["contents"] = []
|
||||
updated_message["documents"][i]["contents"].append(new_content)
|
||||
|
||||
logger.info(f"Updated image analysis for {file_name} with new context: {extraction_prompt}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error updating image analysis for {file_name}: {str(e)}")
|
||||
else:
|
||||
# For other file types, extract text with new context
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
new_content = {
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_context": extraction_prompt
|
||||
}
|
||||
|
||||
# Update or add content
|
||||
contents = document.get("contents", [])
|
||||
contents_updated = False
|
||||
|
||||
for j, content_item in enumerate(contents):
|
||||
if content_item.get("type") == "text":
|
||||
updated_message["documents"][i]["contents"][j] = new_content
|
||||
contents_updated = True
|
||||
break
|
||||
|
||||
if not contents_updated:
|
||||
if not updated_message["documents"][i].get("contents"):
|
||||
updated_message["documents"][i]["contents"] = []
|
||||
updated_message["documents"][i]["contents"].append(new_content)
|
||||
|
||||
logger.info(f"Updated text extraction for {file_name} with new context: {extraction_prompt}")
|
||||
|
||||
# Found and processed the document, stop searching
|
||||
break
|
||||
|
||||
return updated_message
|
||||
|
||||
async def extract_files_from_workflow(self, workflow: Dict[str, Any], extraction_prompt: str, file_filter: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract all relevant files from a workflow with context-aware extraction.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
extraction_prompt: Contextual prompt for extraction
|
||||
file_filter: Optional filter for file types (e.g., "csv", "image")
|
||||
|
||||
Returns:
|
||||
Dictionary with extracted content
|
||||
"""
|
||||
# Import for data extraction
|
||||
from modules.agentservice_dataextraction import data_extraction
|
||||
|
||||
# Get all files from the workflow
|
||||
files = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
# Apply filter if provided
|
||||
if file_filter:
|
||||
file_name = file_info.get("name", "").lower()
|
||||
content_type = file_info.get("content_type", "").lower()
|
||||
|
||||
if (file_filter.lower() in file_name or
|
||||
file_filter.lower() in content_type):
|
||||
# Check if file is already in the list
|
||||
if not any(f.get("id") == file_info["id"] for f in files):
|
||||
files.append(file_info)
|
||||
else:
|
||||
# No filter, include all files
|
||||
if not any(f.get("id") == file_info["id"] for f in files):
|
||||
files.append(file_info)
|
||||
|
||||
# If no files found, return empty result
|
||||
if not files:
|
||||
return {
|
||||
"prompt": extraction_prompt,
|
||||
"files_processed": 0,
|
||||
"extracted_content": []
|
||||
}
|
||||
|
||||
# Get all messages from the workflow
|
||||
workflow_messages = workflow.get("messages", [])
|
||||
|
||||
# Extract data using the dataextraction module
|
||||
extracted_data = await data_extraction(
|
||||
prompt=extraction_prompt,
|
||||
files=files,
|
||||
messages=workflow_messages,
|
||||
ai_service=self.ai_service,
|
||||
lucydom_interface=self.lucydom_interface,
|
||||
workflow_id=self.workflow_id,
|
||||
add_log_func=None # We don't have access to add_log_func here
|
||||
)
|
||||
|
||||
return extracted_data
|
||||
|
||||
def get_file_content_from_message(self, message: Dict[str, Any], file_id: int = None, doc_id: str = None) -> str:
|
||||
"""
|
||||
Get file content from a message.
|
||||
|
||||
Args:
|
||||
message: The message containing the document
|
||||
file_id: Optional file ID to search for
|
||||
doc_id: Optional document ID to search for
|
||||
|
||||
Returns:
|
||||
Text content of the file if available
|
||||
"""
|
||||
if not message or "documents" not in message:
|
||||
return ""
|
||||
|
||||
# Search for the document
|
||||
for document in message.get("documents", []):
|
||||
# Match by document ID or file ID
|
||||
source = document.get("source", {})
|
||||
source_file_id = source.get("id")
|
||||
|
||||
if ((doc_id and document.get("id") == doc_id) or
|
||||
(file_id and source_file_id and str(file_id) == str(source_file_id))):
|
||||
|
||||
# Get text content from document
|
||||
for content in document.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
return content.get("text", "")
|
||||
|
||||
return ""
|
||||
|
||||
def create_text_document(self, message: Dict[str, Any], content: str, title: str = "Generated Text") -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new text document in a message.
|
||||
|
||||
Args:
|
||||
message: The message to add the document to
|
||||
content: Text content
|
||||
title: Document title
|
||||
|
||||
Returns:
|
||||
Updated message with the new document
|
||||
"""
|
||||
# Initialize documents array if needed
|
||||
updated_message = message.copy()
|
||||
if "documents" not in updated_message:
|
||||
updated_message["documents"] = []
|
||||
|
||||
# Create document ID
|
||||
doc_id = f"doc_{uuid.uuid4()}"
|
||||
|
||||
# Create document structure
|
||||
document = {
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "generated",
|
||||
"id": doc_id,
|
||||
"name": title,
|
||||
"content_type": "text/plain",
|
||||
"size": len(content)
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": True
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Add document to message
|
||||
updated_message["documents"].append(document)
|
||||
|
||||
logger.info(f"Created text document '{title}' in message")
|
||||
return updated_message
|
||||
|
||||
def merge_document_contents(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Merge all document contents from a message into a single text.
|
||||
|
||||
Args:
|
||||
message: The message containing documents
|
||||
|
||||
Returns:
|
||||
Combined text content from all documents
|
||||
"""
|
||||
if not message or "documents" not in message:
|
||||
return ""
|
||||
|
||||
combined_text = ""
|
||||
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
doc_name = source.get("name", "Unnamed Document")
|
||||
|
||||
# Extract text content
|
||||
doc_text = ""
|
||||
for content in document.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
doc_text = content.get("text", "")
|
||||
break
|
||||
|
||||
if doc_text:
|
||||
combined_text += f"\n\n--- {doc_name} ---\n\n{doc_text}"
|
||||
|
||||
return combined_text.strip()
|
||||
|
||||
# Factory function
|
||||
def get_document_handler(workflow_id: str = None, lucydom_interface = None, ai_service = None) -> DocumentHandler:
|
||||
"""Get a document handler instance."""
|
||||
return DocumentHandler(workflow_id, lucydom_interface, ai_service)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,338 +0,0 @@
|
|||
"""
|
||||
Agent Communication Protocol module for the Agentservice.
|
||||
Defines a standardized format for agents to exchange information.
|
||||
"""
|
||||
|
||||
import json
|
||||
import uuid
|
||||
from typing import Dict, Any, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
class AgentMessage:
|
||||
"""
|
||||
Standard message format for inter-agent communication.
|
||||
Includes content, metadata, and document references.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
content: str,
|
||||
sender_id: str,
|
||||
receiver_id: Optional[str] = None,
|
||||
message_type: str = "text",
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
documents: Optional[List[Dict[str, Any]]] = None,
|
||||
context_id: Optional[str] = None
|
||||
):
|
||||
"""
|
||||
Initialize an agent message.
|
||||
|
||||
Args:
|
||||
content: The main message content
|
||||
sender_id: ID of the sending agent
|
||||
receiver_id: Optional ID of the receiving agent
|
||||
message_type: Type of message (text, task, result, etc.)
|
||||
metadata: Optional metadata dictionary
|
||||
documents: Optional list of document references
|
||||
context_id: Optional conversation context ID
|
||||
"""
|
||||
self.id = f"msg_{uuid.uuid4()}"
|
||||
self.timestamp = datetime.now().isoformat()
|
||||
self.content = content
|
||||
self.sender_id = sender_id
|
||||
self.receiver_id = receiver_id
|
||||
self.message_type = message_type
|
||||
self.metadata = metadata or {}
|
||||
self.documents = documents or []
|
||||
self.context_id = context_id
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert the message to a dictionary."""
|
||||
return {
|
||||
"id": self.id,
|
||||
"timestamp": self.timestamp,
|
||||
"content": self.content,
|
||||
"sender_id": self.sender_id,
|
||||
"receiver_id": self.receiver_id,
|
||||
"message_type": self.message_type,
|
||||
"metadata": self.metadata,
|
||||
"documents": self.documents,
|
||||
"context_id": self.context_id
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def from_dict(cls, data: Dict[str, Any]) -> 'AgentMessage':
|
||||
"""Create a message from a dictionary."""
|
||||
message = cls(
|
||||
content=data.get("content", ""),
|
||||
sender_id=data.get("sender_id", "unknown"),
|
||||
receiver_id=data.get("receiver_id"),
|
||||
message_type=data.get("message_type", "text"),
|
||||
metadata=data.get("metadata", {}),
|
||||
documents=data.get("documents", []),
|
||||
context_id=data.get("context_id")
|
||||
)
|
||||
message.id = data.get("id", message.id)
|
||||
message.timestamp = data.get("timestamp", message.timestamp)
|
||||
return message
|
||||
|
||||
def to_json(self) -> str:
|
||||
"""Convert the message to a JSON string."""
|
||||
return json.dumps(self.to_dict())
|
||||
|
||||
@classmethod
|
||||
def from_json(cls, json_str: str) -> 'AgentMessage':
|
||||
"""Create a message from a JSON string."""
|
||||
return cls.from_dict(json.loads(json_str))
|
||||
|
||||
class AgentCommunicationProtocol:
|
||||
"""
|
||||
Defines the protocol for agents to communicate with each other.
|
||||
Provides standardized message creation and handling.
|
||||
"""
|
||||
|
||||
@staticmethod
|
||||
def create_text_message(
|
||||
content: str,
|
||||
sender_id: str,
|
||||
receiver_id: Optional[str] = None,
|
||||
metadata: Optional[Dict[str, Any]] = None,
|
||||
documents: Optional[List[Dict[str, Any]]] = None,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create a simple text message."""
|
||||
return AgentMessage(
|
||||
content=content,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="text",
|
||||
metadata=metadata,
|
||||
documents=documents,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_task_message(
|
||||
task_description: str,
|
||||
sender_id: str,
|
||||
receiver_id: str,
|
||||
input_data: Optional[Dict[str, Any]] = None,
|
||||
documents: Optional[List[Dict[str, Any]]] = None,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create a task assignment message."""
|
||||
metadata = {
|
||||
"task_type": "general",
|
||||
"input_data": input_data or {},
|
||||
"priority": "normal",
|
||||
"task_id": f"task_{uuid.uuid4()}"
|
||||
}
|
||||
|
||||
return AgentMessage(
|
||||
content=task_description,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="task",
|
||||
metadata=metadata,
|
||||
documents=documents,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_result_message(
|
||||
result_content: str,
|
||||
sender_id: str,
|
||||
receiver_id: str,
|
||||
task_id: str,
|
||||
output_data: Optional[Dict[str, Any]] = None,
|
||||
result_format: str = "text",
|
||||
documents: Optional[List[Dict[str, Any]]] = None,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create a task result message."""
|
||||
metadata = {
|
||||
"task_id": task_id,
|
||||
"result_format": result_format,
|
||||
"status": "completed",
|
||||
"output_data": output_data or {}
|
||||
}
|
||||
|
||||
return AgentMessage(
|
||||
content=result_content,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="result",
|
||||
metadata=metadata,
|
||||
documents=documents,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_error_message(
|
||||
error_description: str,
|
||||
sender_id: str,
|
||||
receiver_id: Optional[str] = None,
|
||||
error_type: str = "general",
|
||||
error_details: Optional[Dict[str, Any]] = None,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create an error message."""
|
||||
metadata = {
|
||||
"error_type": error_type,
|
||||
"error_details": error_details or {},
|
||||
"severity": "error"
|
||||
}
|
||||
|
||||
return AgentMessage(
|
||||
content=error_description,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="error",
|
||||
metadata=metadata,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_document_request_message(
|
||||
document_description: str,
|
||||
sender_id: str,
|
||||
receiver_id: str,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create a document request message."""
|
||||
metadata = {
|
||||
"request_type": "document",
|
||||
"filters": filters or {},
|
||||
"request_id": f"req_{uuid.uuid4()}"
|
||||
}
|
||||
|
||||
return AgentMessage(
|
||||
content=document_description,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="request",
|
||||
metadata=metadata,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def create_status_update_message(
|
||||
status_description: str,
|
||||
sender_id: str,
|
||||
receiver_id: Optional[str] = None,
|
||||
status: str = "in_progress",
|
||||
progress: float = 0.0,
|
||||
context_id: Optional[str] = None
|
||||
) -> AgentMessage:
|
||||
"""Create a status update message."""
|
||||
metadata = {
|
||||
"status": status,
|
||||
"progress": progress,
|
||||
"update_type": "status"
|
||||
}
|
||||
|
||||
return AgentMessage(
|
||||
content=status_description,
|
||||
sender_id=sender_id,
|
||||
receiver_id=receiver_id,
|
||||
message_type="status",
|
||||
metadata=metadata,
|
||||
context_id=context_id
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def convert_system_message_to_agent_message(system_message: Dict[str, Any], sender_id: str) -> AgentMessage:
|
||||
"""
|
||||
Convert a system message to an agent message.
|
||||
|
||||
Args:
|
||||
system_message: Message object from the workflow
|
||||
sender_id: ID of the sending agent
|
||||
|
||||
Returns:
|
||||
AgentMessage instance
|
||||
"""
|
||||
# Extract basic information
|
||||
content = system_message.get("content", "")
|
||||
message_id = system_message.get("id", f"msg_{uuid.uuid4()}")
|
||||
timestamp = system_message.get("started_at", datetime.now().isoformat())
|
||||
|
||||
# Create metadata
|
||||
metadata = {
|
||||
"agent_type": system_message.get("agent_type"),
|
||||
"agent_name": system_message.get("agent_name"),
|
||||
"workflow_id": system_message.get("workflow_id"),
|
||||
"sequence_no": system_message.get("sequence_no"),
|
||||
"result_format": system_message.get("result_format"),
|
||||
"original_message_id": message_id
|
||||
}
|
||||
|
||||
# Create agent message
|
||||
agent_message = AgentMessage(
|
||||
content=content,
|
||||
sender_id=sender_id,
|
||||
message_type="system",
|
||||
metadata=metadata,
|
||||
documents=system_message.get("documents", []),
|
||||
context_id=system_message.get("workflow_id")
|
||||
)
|
||||
|
||||
# Set original ID and timestamp
|
||||
agent_message.id = message_id
|
||||
agent_message.timestamp = timestamp
|
||||
|
||||
return agent_message
|
||||
|
||||
@staticmethod
|
||||
def convert_agent_message_to_system_message(agent_message: AgentMessage) -> Dict[str, Any]:
|
||||
"""
|
||||
Convert an agent message to a system message.
|
||||
|
||||
Args:
|
||||
agent_message: The agent message to convert
|
||||
|
||||
Returns:
|
||||
System message dictionary
|
||||
"""
|
||||
message_data = agent_message.to_dict()
|
||||
metadata = message_data.get("metadata", {})
|
||||
|
||||
# Create system message structure
|
||||
system_message = {
|
||||
"id": message_data.get("id", f"msg_{uuid.uuid4()}"),
|
||||
"workflow_id": message_data.get("context_id"),
|
||||
"started_at": message_data.get("timestamp", datetime.now().isoformat()),
|
||||
"finished_at": datetime.now().isoformat(),
|
||||
"sequence_no": metadata.get("sequence_no", 0),
|
||||
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
|
||||
"agent_type": metadata.get("agent_type"),
|
||||
"agent_id": message_data.get("sender_id"),
|
||||
"agent_name": metadata.get("agent_name"),
|
||||
"result_format": metadata.get("result_format", "text"),
|
||||
|
||||
"content": message_data.get("content", ""),
|
||||
"documents": message_data.get("documents", [])
|
||||
}
|
||||
|
||||
# If this is a result message, add more metadata
|
||||
if message_data.get("message_type") == "result":
|
||||
system_message["output_data"] = metadata.get("output_data", {})
|
||||
system_message["task_id"] = metadata.get("task_id")
|
||||
|
||||
return system_message
|
||||
|
||||
# Factory function
|
||||
def get_agent_protocol():
|
||||
"""Get the agent communication protocol."""
|
||||
return AgentCommunicationProtocol
|
||||
|
|
@ -1,290 +0,0 @@
|
|||
"""
|
||||
Updated registry for all available agents in the system.
|
||||
Provides centralized agent registration and access with improved error handling.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import importlib
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
# Import direct base agent module
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentRegistry:
|
||||
"""Registry for all available agents in the system"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Get a singleton instance of the Agent Registry"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the Agent Registry"""
|
||||
if AgentRegistry._instance is not None:
|
||||
raise RuntimeError("Singleton instance already exists - use get_instance()")
|
||||
self.agents = {}
|
||||
self.ai_service = None
|
||||
self.document_handler = None
|
||||
self.lucydom_interface = None
|
||||
self._load_agents()
|
||||
|
||||
def _load_agents(self):
|
||||
"""Load all available agents"""
|
||||
# List of all agent modules to load
|
||||
logger.info("Automatically loading agent modules...")
|
||||
agent_modules = []
|
||||
for filename in os.listdir(os.path.dirname(__file__)):
|
||||
if filename.startswith("agentservice_agent_") and filename.endswith(".py"):
|
||||
agent_modules.append(filename[:-3]) # Remove .py extension
|
||||
if not agent_modules:
|
||||
logger.warning("No agent modules found")
|
||||
return
|
||||
logger.info(f"Found {len(agent_modules)} agent modules")
|
||||
|
||||
for module_name in agent_modules:
|
||||
try:
|
||||
# Import the module
|
||||
module = importlib.import_module(f"modules.{module_name}")
|
||||
|
||||
# Look for the agent class or a get_*_agent function
|
||||
agent_type = module_name.split('_')[-1]
|
||||
class_name = f"{agent_type.capitalize()}Agent"
|
||||
getter_name = f"get_{agent_type}_agent"
|
||||
|
||||
agent = None
|
||||
|
||||
# Try to get the agent via the get_*_agent function
|
||||
if hasattr(module, getter_name):
|
||||
getter_func = getattr(module, getter_name)
|
||||
agent = getter_func()
|
||||
logger.info(f"Agent '{agent.name}' (Type: {agent.type}) loaded via {getter_name}()")
|
||||
|
||||
# Alternatively, try to instantiate the agent directly
|
||||
elif hasattr(module, class_name):
|
||||
agent_class = getattr(module, class_name)
|
||||
agent = agent_class()
|
||||
logger.info(f"Agent '{agent.name}' (Type: {agent.type}) directly instantiated")
|
||||
|
||||
if agent:
|
||||
# Register the agent
|
||||
self.register_agent(agent)
|
||||
else:
|
||||
logger.warning(f"No agent class or getter function found in module {module_name}")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Module {module_name} could not be imported: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading agent from module {module_name}: {e}")
|
||||
|
||||
def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None):
|
||||
"""
|
||||
Set system dependencies for all agents.
|
||||
|
||||
Args:
|
||||
ai_service: AI service for text generation
|
||||
document_handler: Document handler for document operations
|
||||
lucydom_interface: LucyDOM interface for database access
|
||||
"""
|
||||
self.ai_service = ai_service
|
||||
# Update all registered agents
|
||||
self.update_agent_dependencies()
|
||||
|
||||
|
||||
def update_agent_dependencies(self):
|
||||
"""Update dependencies for all registered agents"""
|
||||
for agent_id, agent in self.agents.items():
|
||||
if hasattr(agent, 'set_dependencies'):
|
||||
agent.set_dependencies(
|
||||
ai_service=self.ai_service,
|
||||
document_handler=self.document_handler,
|
||||
lucydom_interface=self.lucydom_interface
|
||||
)
|
||||
|
||||
def register_agent(self, agent: 'BaseAgent'):
|
||||
"""
|
||||
Register an agent in the registry.
|
||||
|
||||
Args:
|
||||
agent: The agent to register
|
||||
"""
|
||||
agent_type = agent.type
|
||||
agent_id = getattr(agent, 'id', agent_type)
|
||||
|
||||
# Initialize enhanced agents with dependencies
|
||||
if hasattr(agent, 'set_dependencies'):
|
||||
agent.set_dependencies(
|
||||
ai_service=self.ai_service,
|
||||
document_handler=self.document_handler,
|
||||
lucydom_interface=self.lucydom_interface
|
||||
)
|
||||
|
||||
self.agents[agent_type] = agent
|
||||
# Also register by ID if it's different from type
|
||||
if agent_id != agent_type:
|
||||
self.agents[agent_id] = agent
|
||||
|
||||
logger.debug(f"Agent '{agent.name}' (Type: {agent_type}, ID: {agent_id}) registered")
|
||||
|
||||
def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:
|
||||
"""
|
||||
Get an agent instance by ID or type.
|
||||
|
||||
Args:
|
||||
agent_identifier: ID or type of the desired agent
|
||||
|
||||
Returns:
|
||||
Agent instance or None if not found
|
||||
"""
|
||||
# Try to find directly by type
|
||||
if agent_identifier in self.agents:
|
||||
return self.agents[agent_identifier]
|
||||
|
||||
# If not found, try different name variants
|
||||
variants = [
|
||||
agent_identifier,
|
||||
agent_identifier.replace('_agent', ''),
|
||||
f"{agent_identifier}_agent"
|
||||
]
|
||||
|
||||
for variant in variants:
|
||||
if variant in self.agents:
|
||||
return self.agents[variant]
|
||||
|
||||
logger.warning(f"Agent with identifier '{agent_identifier}' not found")
|
||||
return None
|
||||
|
||||
def get_all_agents(self) -> Dict[str, BaseAgent]:
|
||||
"""Get all registered agents."""
|
||||
return self.agents
|
||||
|
||||
def get_agent_infos(self) -> List[Dict[str, Any]]:
|
||||
"""Get information about all registered agents."""
|
||||
agent_infos = []
|
||||
# Only once per agent instance (since we register both by type and ID)
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
agent_infos.append(agent.get_agent_info())
|
||||
seen_agents.add(agent)
|
||||
return agent_infos
|
||||
|
||||
def get_agent_by_format(self, required_format: str) -> Optional[BaseAgent]:
|
||||
"""
|
||||
Find an agent that can produce the required output format.
|
||||
|
||||
Args:
|
||||
required_format: The required output format
|
||||
|
||||
Returns:
|
||||
Agent that can produce the required format, or None if not found
|
||||
"""
|
||||
# Create mapping of result format -> agent for faster lookup
|
||||
format_to_agent = {}
|
||||
seen_agents = set()
|
||||
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
# Get the agent's result format
|
||||
agent_format = getattr(agent, 'result_format', None)
|
||||
if agent_format:
|
||||
format_to_agent[agent_format.lower()] = agent
|
||||
seen_agents.add(agent)
|
||||
|
||||
# Try to find an exact match
|
||||
if required_format.lower() in format_to_agent:
|
||||
return format_to_agent[required_format.lower()]
|
||||
|
||||
# If no exact match, try to find a partial match
|
||||
for fmt, agent in format_to_agent.items():
|
||||
if required_format.lower() in fmt or fmt in required_format.lower():
|
||||
return agent
|
||||
|
||||
# No match found
|
||||
return None
|
||||
|
||||
def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Initialize agents for a workflow."""
|
||||
initialized_agents = {}
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
agent_info = agent.get_agent_info()
|
||||
agent_id = agent_info["id"]
|
||||
initialized_agents[agent_id] = agent_info
|
||||
seen_agents.add(agent)
|
||||
return initialized_agents
|
||||
|
||||
def get_agent_capabilities(self) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Get a mapping of capabilities to agents.
|
||||
Useful for finding the right agent for a specific task.
|
||||
|
||||
Returns:
|
||||
Dict mapping capability keywords to agent IDs
|
||||
"""
|
||||
capabilities_map = {}
|
||||
seen_agents = set()
|
||||
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
# Get agent info
|
||||
agent_id = getattr(agent, 'id', agent.type)
|
||||
|
||||
# Extract capabilities - check for get_capabilities method first
|
||||
if hasattr(agent, 'get_capabilities') and callable(getattr(agent, 'get_capabilities')):
|
||||
capabilities = agent.get_capabilities()
|
||||
else:
|
||||
# Fall back to string parsing
|
||||
capabilities_str = getattr(agent, 'capabilities', "")
|
||||
capabilities = [kw.strip().lower() for kw in capabilities_str.split(',') if kw.strip()]
|
||||
|
||||
# Add each capability to the mapping
|
||||
for capability in capabilities:
|
||||
if capability not in capabilities_map:
|
||||
capabilities_map[capability] = []
|
||||
if agent_id not in capabilities_map[capability]:
|
||||
capabilities_map[capability].append(agent_id)
|
||||
|
||||
seen_agents.add(agent)
|
||||
|
||||
return capabilities_map
|
||||
|
||||
def get_agent_by_capability(self, capability: str) -> Optional['BaseAgent']:
|
||||
"""
|
||||
Find an agent with a specific capability.
|
||||
|
||||
Args:
|
||||
capability: The required capability
|
||||
|
||||
Returns:
|
||||
Agent with the required capability, or None if not found
|
||||
"""
|
||||
# Create mapping of capabilities for faster lookup
|
||||
capability_map = self.get_agent_capabilities()
|
||||
|
||||
# Look for the capability (case-insensitive)
|
||||
capability = capability.lower()
|
||||
matching_agents = []
|
||||
|
||||
# Direct match
|
||||
if capability in capability_map:
|
||||
matching_agents = capability_map[capability]
|
||||
else:
|
||||
# Partial matches
|
||||
for cap, agents in capability_map.items():
|
||||
if capability in cap or cap in capability:
|
||||
matching_agents.extend(agents)
|
||||
|
||||
# Return the first matching agent
|
||||
if matching_agents:
|
||||
agent_id = matching_agents[0]
|
||||
return self.get_agent(agent_id)
|
||||
|
||||
return None
|
||||
|
|
@ -1,760 +0,0 @@
|
|||
"""
|
||||
Centralized utility functions for the Agentservice (continued).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union, Callable
|
||||
from io import BytesIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowUtils:
|
||||
"""
|
||||
Utility class for workflow operations.
|
||||
Centralizes common workflow-related functions.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None):
|
||||
"""Initialize with optional workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Set or update the workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def get_documents(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all documents from a workflow across all messages.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
List of document objects
|
||||
"""
|
||||
documents = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
# Add to list if not already present
|
||||
if not any(d.get("id") == doc.get("id") for d in documents):
|
||||
documents.append(doc)
|
||||
|
||||
return documents
|
||||
|
||||
def get_files(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all file references from a workflow.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
List of file metadata objects
|
||||
"""
|
||||
files = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
# Check if file is already in the list
|
||||
if not any(f.get("id") == file_info["id"] for f in files):
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def extract_by_prompt(self, workflow: Dict[str, Any], prompt: str, ai_service) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract data from workflow documents based on an AI prompt.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
prompt: The extraction prompt
|
||||
ai_service: The AI service to use for extraction
|
||||
|
||||
Returns:
|
||||
Extracted data
|
||||
"""
|
||||
# This is an async method but we're exposing it as a regular method
|
||||
# The caller should use it with asyncio.run() or await
|
||||
async def _extract():
|
||||
# Create extraction prompt
|
||||
files = self.get_files(workflow)
|
||||
file_descriptions = "\n".join([f"- {f.get('name', 'unnamed')} ({f.get('type', 'unknown')})" for f in files])
|
||||
|
||||
extraction_prompt = f"""
|
||||
Extract relevant information from the following files based on this request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
FILES:
|
||||
{file_descriptions}
|
||||
|
||||
Focus on the most relevant content and provide a structured output.
|
||||
"""
|
||||
|
||||
# Call AI
|
||||
response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
||||
|
||||
return {
|
||||
"prompt": prompt,
|
||||
"extracted_content": response,
|
||||
"files_processed": len(files)
|
||||
}
|
||||
|
||||
# Return the coroutine
|
||||
return _extract()
|
||||
|
||||
def merge_workflows(self, workflows: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Merge multiple workflows into a single unified workflow.
|
||||
Useful for workflow templates or combining partial workflows.
|
||||
|
||||
Args:
|
||||
workflows: List of workflow objects to merge
|
||||
|
||||
Returns:
|
||||
Merged workflow
|
||||
"""
|
||||
if not workflows:
|
||||
return {}
|
||||
|
||||
# Start with the first workflow
|
||||
result = workflows[0].copy()
|
||||
|
||||
# Initialize lists if not present
|
||||
if "messages" not in result:
|
||||
result["messages"] = []
|
||||
if "logs" not in result:
|
||||
result["logs"] = []
|
||||
|
||||
# Merge additional workflows
|
||||
for workflow in workflows[1:]:
|
||||
# Append messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Check for duplicates
|
||||
if not any(m.get("id") == message.get("id") for m in result["messages"]):
|
||||
result["messages"].append(message)
|
||||
|
||||
# Append logs
|
||||
for log in workflow.get("logs", []):
|
||||
# Check for duplicates
|
||||
if not any(l.get("id") == log.get("id") for l in result["logs"]):
|
||||
result["logs"].append(log)
|
||||
|
||||
# Update status if needed
|
||||
if workflow.get("status") == "failed":
|
||||
result["status"] = "failed"
|
||||
|
||||
# Update last_activity if newer
|
||||
if (workflow.get("last_activity") and
|
||||
(not result.get("last_activity") or
|
||||
workflow["last_activity"] > result["last_activity"])):
|
||||
result["last_activity"] = workflow["last_activity"]
|
||||
|
||||
return result
|
||||
|
||||
def get_message(self, workflow: Dict[str, Any], message_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find a message by ID in the workflow.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
message_id: The message ID to find
|
||||
|
||||
Returns:
|
||||
Message object or None if not found
|
||||
"""
|
||||
for message in workflow.get("messages", []):
|
||||
if message.get("id") == message_id:
|
||||
return message
|
||||
return None
|
||||
|
||||
def to_str(self, workflow: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert workflow to a formatted string representation.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
String representation of the workflow
|
||||
"""
|
||||
# Create a summary string
|
||||
result = f"Workflow: {workflow.get('id')}\n"
|
||||
result += f"Status: {workflow.get('status', 'unknown')}\n"
|
||||
result += f"Started: {workflow.get('started_at', 'unknown')}\n"
|
||||
result += f"Last Activity: {workflow.get('last_activity', 'unknown')}\n"
|
||||
|
||||
# Add message count
|
||||
message_count = len(workflow.get("messages", []))
|
||||
result += f"Messages: {message_count}\n"
|
||||
|
||||
# Add log count
|
||||
log_count = len(workflow.get("logs", []))
|
||||
result += f"Logs: {log_count}\n"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class MessageUtils:
|
||||
"""
|
||||
Utility class for message operations.
|
||||
Centralizes common message-related functions.
|
||||
"""
|
||||
|
||||
def create_message(self, workflow_id: str, role: str = "system") -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new message object.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
role: Role of the message ('system', 'user', 'assistant')
|
||||
|
||||
Returns:
|
||||
New message object
|
||||
"""
|
||||
message_id = f"msg_{uuid.uuid4()}"
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
# Create message object
|
||||
message = {
|
||||
"id": message_id,
|
||||
"workflow_id": workflow_id,
|
||||
"parent_message_id": None,
|
||||
"started_at": current_time,
|
||||
"finished_at": None,
|
||||
"sequence_no": 0,
|
||||
|
||||
"status": "pending",
|
||||
"role": role,
|
||||
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
|
||||
"documents": [],
|
||||
"content": None,
|
||||
"agent_type": None
|
||||
}
|
||||
|
||||
return message
|
||||
|
||||
def finalize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Finalize a message by setting completion timestamp.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
Updated message object
|
||||
"""
|
||||
message["finished_at"] = datetime.now().isoformat()
|
||||
message["status"] = "completed"
|
||||
return message
|
||||
|
||||
def get_documents(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all documents from a message.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
List of document objects
|
||||
"""
|
||||
return message.get("documents", [])
|
||||
|
||||
def get_files(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all file references from a message.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
List of file metadata objects
|
||||
"""
|
||||
files = []
|
||||
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def extract_text_content(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Extract text content from a message including document content.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
String with all text content from the message
|
||||
"""
|
||||
content = message.get("content", "")
|
||||
|
||||
# Add document content
|
||||
for doc in message.get("documents", []):
|
||||
# Check for document contents
|
||||
for doc_content in doc.get("contents", []):
|
||||
if doc_content.get("type") == "text":
|
||||
content += "\n\n" + doc_content.get("text", "")
|
||||
|
||||
return content
|
||||
|
||||
def to_str(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert message to a formatted string representation.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
String representation of the message
|
||||
"""
|
||||
# Create a summary string
|
||||
result = f"Message: {message.get('id')}\n"
|
||||
result += f"Role: {message.get('role', 'unknown')}\n"
|
||||
|
||||
# Add agent info if available
|
||||
if message.get("agent_type"):
|
||||
result += f"Agent: {message.get('agent_name', message.get('agent_type', 'unknown'))}\n"
|
||||
|
||||
# Add content summary
|
||||
content = message.get("content", "")
|
||||
if content:
|
||||
content_preview = content[:100] + "..." if len(content) > 100 else content
|
||||
result += f"Content: {content_preview}\n"
|
||||
|
||||
# Add document count
|
||||
doc_count = len(message.get("documents", []))
|
||||
result += f"Documents: {doc_count}\n"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class FileUtils:
|
||||
"""
|
||||
Utility class for file operations.
|
||||
Centralizes common file-related functions.
|
||||
"""
|
||||
|
||||
def is_text_extractable(self, file_name: str, content_type: str = None) -> bool:
|
||||
"""
|
||||
Check if text can be extracted from a file.
|
||||
|
||||
Args:
|
||||
file_name: Name of the file
|
||||
content_type: MIME type (optional)
|
||||
|
||||
Returns:
|
||||
True if text can be extracted, False otherwise
|
||||
"""
|
||||
# Text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
||||
return True
|
||||
|
||||
# Excel files
|
||||
if file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
# PDF files
|
||||
if file_name.endswith('.pdf'):
|
||||
try:
|
||||
# Check if PyPDF2 or PyMuPDF is available
|
||||
try:
|
||||
import PyPDF2
|
||||
return True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
# Images and other non-text files
|
||||
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
||||
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
||||
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
||||
return False
|
||||
|
||||
# Check content type if file extension doesn't give a clear answer
|
||||
if content_type:
|
||||
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
||||
return True
|
||||
elif content_type == 'application/pdf':
|
||||
return True
|
||||
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return False
|
||||
|
||||
# Default to allowing extraction attempt
|
||||
return True
|
||||
|
||||
def get_mime_type(self, file_name: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file name.
|
||||
|
||||
Args:
|
||||
file_name: Name of the file
|
||||
|
||||
Returns:
|
||||
MIME type string
|
||||
"""
|
||||
import mimetypes
|
||||
|
||||
# Initialize mimetypes
|
||||
mimetypes.init()
|
||||
|
||||
# Get MIME type
|
||||
mime_type, _ = mimetypes.guess_type(file_name)
|
||||
|
||||
if not mime_type:
|
||||
# Default mappings for common extensions
|
||||
extension_map = {
|
||||
'txt': 'text/plain',
|
||||
'md': 'text/markdown',
|
||||
'json': 'application/json',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'pdf': 'application/pdf',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'svg': 'image/svg+xml',
|
||||
'webp': 'image/webp',
|
||||
'mp4': 'video/mp4',
|
||||
'mp3': 'audio/mpeg'
|
||||
}
|
||||
|
||||
# Get extension
|
||||
ext = os.path.splitext(file_name)[1].lower().lstrip('.')
|
||||
|
||||
# Return mapped MIME type or default
|
||||
mime_type = extension_map.get(ext, 'application/octet-stream')
|
||||
|
||||
return mime_type
|
||||
|
||||
|
||||
class LoggingUtils:
|
||||
"""
|
||||
Enhanced logging utilities for better workflow tracking.
|
||||
Provides structured and categorized logging for workflows.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None, log_func: Callable = None):
|
||||
"""
|
||||
Initialize logging utilities.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow for context
|
||||
log_func: Function to call for adding workflow logs
|
||||
"""
|
||||
self.workflow_id = workflow_id
|
||||
self.log_func = log_func
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Define log categories
|
||||
self.categories = {
|
||||
"workflow": "Workflow Management",
|
||||
"planning": "Activity Planning",
|
||||
"execution": "Activity Execution",
|
||||
"agents": "Agent Selection & Execution",
|
||||
"files": "File Processing",
|
||||
"summary": "Results Summary",
|
||||
"error": "Error Handling",
|
||||
"code": "Code Execution",
|
||||
}
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Update the workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_log_func(self, log_func: Callable):
|
||||
"""Update the log function"""
|
||||
self.log_func = log_func
|
||||
|
||||
def info(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log an informational message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.info(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "info", category, category_name)
|
||||
|
||||
def warning(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log a warning message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.warning(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "warning", category, category_name)
|
||||
|
||||
def error(self, message: str, category: str = "error", details: str = None):
|
||||
"""
|
||||
Log an error message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.error(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "error", category, category_name)
|
||||
|
||||
def debug(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log a debug message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.debug(log_message)
|
||||
|
||||
def get_category_name(self, category: str) -> str:
|
||||
"""
|
||||
Get human-readable category name.
|
||||
|
||||
Args:
|
||||
category: Category code
|
||||
|
||||
Returns:
|
||||
Human-readable category name
|
||||
"""
|
||||
return self.categories.get(category, category)
|
||||
|
||||
|
||||
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:
|
||||
"""
|
||||
Extract text from various file formats based on binary content.
|
||||
|
||||
Args:
|
||||
file_content: Binary content of the file
|
||||
file_name: Name of the file for format detection
|
||||
content_type: Optional MIME type of the file
|
||||
|
||||
Returns:
|
||||
Tuple with (extracted text, is_extracted flag)
|
||||
"""
|
||||
# Check if file is likely text-extractable
|
||||
if not is_text_extractable(file_name, content_type):
|
||||
return f"[File: {file_name} - Text extraction not supported]", False
|
||||
|
||||
try:
|
||||
# Simple text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv', '.log', '.ini', '.cfg', '.conf')) or (content_type and (content_type.startswith('text/') or content_type in ['application/json', 'application/xml', 'text/csv'])):
|
||||
try:
|
||||
return file_content.decode('utf-8'), True
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
return file_content.decode('latin1'), True
|
||||
except:
|
||||
return file_content.decode('cp1252', errors='replace'), True
|
||||
|
||||
# Excel files
|
||||
elif file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas as pd
|
||||
# Create temporary in-memory file
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_excel(file_obj)
|
||||
result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result, True
|
||||
except ImportError:
|
||||
return f"[Excel file: {file_name} - pandas not installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error extracting Excel content: {str(e)}]", False
|
||||
|
||||
# CSV files
|
||||
elif file_name.endswith('.csv'):
|
||||
try:
|
||||
import pandas as pd
|
||||
try:
|
||||
# Create temporary in-memory file
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_csv(file_obj, encoding='utf-8')
|
||||
except UnicodeDecodeError:
|
||||
file_obj = BytesIO(file_content)
|
||||
try:
|
||||
df = pd.read_csv(file_obj, encoding='latin1')
|
||||
except:
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_csv(file_obj, encoding='cp1252')
|
||||
|
||||
result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result, True
|
||||
except ImportError:
|
||||
return f"[CSV file: {file_name} - pandas not installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error extracting CSV content: {str(e)}]", False
|
||||
|
||||
# PDF files
|
||||
elif file_name.endswith('.pdf'):
|
||||
try:
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
reader = PdfReader(BytesIO(file_content))
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n\n"
|
||||
return text, True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
doc = fitz.open(stream=file_content, filetype="pdf")
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += page.get_text() + "\n\n"
|
||||
return text, True
|
||||
except ImportError:
|
||||
return f"[PDF: {file_name} - No PDF library installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error reading PDF file {file_name}: {str(e)}]", False
|
||||
|
||||
# Default case - try basic text extraction
|
||||
else:
|
||||
try:
|
||||
return file_content.decode('utf-8', errors='replace'), True
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
return f"[Text extraction error: {str(e)}]", False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
return f"[Text extraction error: {str(e)}]", False
|
||||
|
||||
|
||||
def is_text_extractable(file_name: str, content_type: str = None) -> bool:
|
||||
"""Check if text can be extracted from a file."""
|
||||
# Text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
||||
return True
|
||||
|
||||
# Excel files
|
||||
if file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
# PDF files
|
||||
if file_name.endswith('.pdf'):
|
||||
try:
|
||||
# Check if PyPDF2 or PyMuPDF is available
|
||||
try:
|
||||
import PyPDF2
|
||||
return True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
# Images and other non-text files
|
||||
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
||||
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
||||
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
||||
return False
|
||||
|
||||
# Check content type if file extension doesn't give a clear answer
|
||||
if content_type:
|
||||
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
||||
return True
|
||||
elif content_type == 'application/pdf':
|
||||
return True
|
||||
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return False
|
||||
|
||||
# Default to allowing extraction attempt
|
||||
return True
|
||||
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -1,689 +0,0 @@
|
|||
"""
|
||||
Refactored WorkflowManager class for the Agentservice (continued).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowManager:
|
||||
|
||||
def __init__(self, mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):
|
||||
"""Initialize the WorkflowManager."""
|
||||
self.mandate_id = mandate_id
|
||||
self.user_id = user_id
|
||||
self.ai_service = ai_service
|
||||
self.lucydom_interface = lucydom_interface
|
||||
|
||||
# Cache for workflows
|
||||
self.workflows = {}
|
||||
|
||||
# Directory for results
|
||||
self.results_dir = os.path.join("results", "workflows")
|
||||
os.makedirs(self.results_dir, exist_ok=True)
|
||||
|
||||
# Initialize document handler
|
||||
from modules.agentservice_document_handler import get_document_handler
|
||||
self.document_handler = get_document_handler(
|
||||
lucydom_interface=lucydom_interface,
|
||||
ai_service=ai_service
|
||||
)
|
||||
|
||||
# Initialize agent registry with dependencies
|
||||
from modules.agentservice_registry import AgentRegistry
|
||||
registry = AgentRegistry.get_instance()
|
||||
registry.set_dependencies(
|
||||
ai_service=ai_service,
|
||||
document_handler=self.document_handler,
|
||||
lucydom_interface=lucydom_interface
|
||||
)
|
||||
|
||||
async def list_workflows(self, mandate_id: int = None, user_id: int = None) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
List all available workflows.
|
||||
|
||||
Args:
|
||||
mandate_id: Optional mandate ID for filtering
|
||||
user_id: Optional user ID for filtering
|
||||
|
||||
Returns:
|
||||
List of workflow summaries
|
||||
"""
|
||||
workflows = []
|
||||
|
||||
# Load from database if available
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
# Get all workflows for the user
|
||||
if user_id is not None:
|
||||
user_workflows = self.lucydom_interface.get_workflows_by_user(user_id)
|
||||
else:
|
||||
user_workflows = self.lucydom_interface.get_all_workflows()
|
||||
|
||||
# Filter by mandate if specified
|
||||
if mandate_id is not None:
|
||||
user_workflows = [wf for wf in user_workflows if wf.get("mandate_id") == mandate_id]
|
||||
|
||||
# Create workflow summaries
|
||||
for workflow in user_workflows:
|
||||
summary = {
|
||||
"id": workflow.get("id"),
|
||||
"name": workflow.get("name", f"Workflow {workflow.get('id')}"),
|
||||
"status": workflow.get("status"),
|
||||
"started_at": workflow.get("started_at"),
|
||||
"last_activity": workflow.get("last_activity"),
|
||||
"completed_at": workflow.get("completed_at")
|
||||
}
|
||||
|
||||
# Add message count if available
|
||||
messages = self.lucydom_interface.get_workflow_messages(workflow.get("id"))
|
||||
if messages:
|
||||
summary["message_count"] = len(messages)
|
||||
|
||||
workflows.append(summary)
|
||||
|
||||
logger.info(f"Loaded {len(workflows)} workflows from database")
|
||||
|
||||
# Sort by last activity (newest first)
|
||||
return sorted(workflows, key=lambda w: w.get("last_activity", ""), reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error retrieving workflows from database: {str(e)}")
|
||||
|
||||
# Load from files if no database or error occurred
|
||||
try:
|
||||
for filename in os.listdir(self.results_dir):
|
||||
if filename.startswith("workflow_") and filename.endswith(".json"):
|
||||
workflow_path = os.path.join(self.results_dir, filename)
|
||||
|
||||
try:
|
||||
import json
|
||||
with open(workflow_path, 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
|
||||
# Check if mandate and user ID match filters
|
||||
if mandate_id is not None and workflow.get("mandate_id") != mandate_id:
|
||||
continue
|
||||
|
||||
if user_id is not None and workflow.get("user_id") != user_id:
|
||||
continue
|
||||
|
||||
# Create workflow summary
|
||||
summary = {
|
||||
"id": workflow.get("id"),
|
||||
"name": workflow.get("name", f"Workflow {workflow.get('id')}"),
|
||||
"status": workflow.get("status"),
|
||||
"started_at": workflow.get("started_at"),
|
||||
"last_activity": workflow.get("last_activity"),
|
||||
"message_count": len(workflow.get("messages", []))
|
||||
}
|
||||
|
||||
workflows.append(summary)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow file {filename}: {str(e)}")
|
||||
|
||||
logger.info(f"Loaded {len(workflows)} workflows from files")
|
||||
|
||||
# Sort by last activity (newest first)
|
||||
return sorted(workflows, key=lambda w: w.get("last_activity", ""), reverse=True)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error listing workflows: {str(e)}")
|
||||
return []
|
||||
|
||||
async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None, workflow_id: str = None, is_user_input: bool = False) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a workflow with the given message and files.
|
||||
|
||||
Args:
|
||||
message: Input message (prompt)
|
||||
files: Optional list of file metadata
|
||||
workflow_id: Optional ID for continuing an existing workflow
|
||||
is_user_input: Flag indicating if this is user input to an existing workflow
|
||||
|
||||
Returns:
|
||||
Workflow execution result
|
||||
"""
|
||||
|
||||
# Use provided workflow_id or generate a new one for a new workflow
|
||||
if not workflow_id:
|
||||
workflow_id = f"wf_{uuid.uuid4()}"
|
||||
# Initialize a new workflow
|
||||
workflow = self._initialize_workflow(workflow_id)
|
||||
else:
|
||||
# Load existing workflow for continuation
|
||||
workflow = await self.load_workflow(workflow_id)
|
||||
if not workflow:
|
||||
# Fallback: initialize a new workflow with the provided ID
|
||||
workflow = self._initialize_workflow(workflow_id)
|
||||
|
||||
# Capture start time
|
||||
start_time = datetime.now()
|
||||
|
||||
try:
|
||||
# Create WorkflowExecution with document handler
|
||||
from modules.agentservice_workflow_execution import WorkflowExecution
|
||||
execution = WorkflowExecution(
|
||||
workflow_manager=self,
|
||||
workflow_id=workflow_id,
|
||||
mandate_id=self.mandate_id,
|
||||
user_id=self.user_id,
|
||||
ai_service=self.ai_service,
|
||||
lucydom_interface=self.lucydom_interface
|
||||
)
|
||||
|
||||
# Set the document handler's workflow ID
|
||||
self.document_handler.set_workflow_id(workflow_id)
|
||||
|
||||
# Execute the workflow
|
||||
result = await execution.execute(message, workflow, files, is_user_input)
|
||||
|
||||
# Calculate duration
|
||||
duration = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
# Update workflow stats
|
||||
if "data_stats" not in workflow:
|
||||
workflow["data_stats"] = {
|
||||
"total_processing_time": 0.0,
|
||||
"total_token_count": 0,
|
||||
"total_bytes_sent": 0,
|
||||
"total_bytes_received": 0
|
||||
}
|
||||
workflow["data_stats"]["total_processing_time"] = duration
|
||||
workflow["completed_at"] = datetime.now().isoformat()
|
||||
|
||||
# Save final state
|
||||
self._save_workflow(workflow)
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing workflow: {str(e)}", exc_info=True)
|
||||
|
||||
# Update workflow status
|
||||
workflow["status"] = "failed"
|
||||
workflow["last_activity"] = datetime.now().isoformat()
|
||||
self._add_log(workflow, f"Workflow execution failed: {str(e)}", "error")
|
||||
|
||||
# Save failed state
|
||||
self._save_workflow(workflow)
|
||||
|
||||
return {
|
||||
"workflow_id": workflow_id,
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
def _save_workflow(self, workflow: Dict[str, Any]) -> bool:
|
||||
"""
|
||||
Save workflow state to database and/or file.
|
||||
Enhanced to handle structured documents.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object to save
|
||||
|
||||
Returns:
|
||||
True if saved successfully, False otherwise
|
||||
"""
|
||||
try:
|
||||
workflow_id = workflow.get("id")
|
||||
|
||||
# Update in-memory cache
|
||||
self.workflows[workflow_id] = workflow
|
||||
|
||||
# Update in database if available
|
||||
if self.lucydom_interface:
|
||||
# NEW: Enhanced document handling for database persistence
|
||||
# Create a copy of the workflow for database storage
|
||||
db_workflow = workflow.copy()
|
||||
# Save to database
|
||||
try:
|
||||
self.lucydom_interface.save_workflow_state(db_workflow)
|
||||
logger.info(f"Workflow {workflow_id} saved to database")
|
||||
except Exception as db_error:
|
||||
logger.error(f"Error saving workflow to database: {str(db_error)}")
|
||||
# Continue to file saving even if database fails
|
||||
|
||||
# Save to file (always do this as backup)
|
||||
import json
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
|
||||
with open(workflow_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(workflow, f, indent=2, ensure_ascii=False)
|
||||
|
||||
logger.info(f"Workflow {workflow_id} saved to file: {workflow_path}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving workflow state: {str(e)}")
|
||||
return False
|
||||
|
||||
async def load_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Load a workflow by ID.
|
||||
Enhanced to ensure document handler is properly configured.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow to load
|
||||
|
||||
Returns:
|
||||
The workflow object or None if not found
|
||||
"""
|
||||
# Check memory cache first
|
||||
if workflow_id in self.workflows:
|
||||
workflow = self.workflows[workflow_id]
|
||||
|
||||
# NEW: Configure document handler for this workflow
|
||||
self.document_handler.set_workflow_id(workflow_id)
|
||||
|
||||
return workflow
|
||||
|
||||
# Try to load from database
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
workflow = self.lucydom_interface.load_workflow_state(workflow_id)
|
||||
if workflow:
|
||||
# Cache in memory
|
||||
self.workflows[workflow_id] = workflow
|
||||
|
||||
# NEW: Configure document handler for this workflow
|
||||
self.document_handler.set_workflow_id(workflow_id)
|
||||
|
||||
logger.info(f"Workflow {workflow_id} loaded from database")
|
||||
return workflow
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow from database: {str(e)}")
|
||||
|
||||
# Try to load from file
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
|
||||
if os.path.exists(workflow_path):
|
||||
try:
|
||||
import json
|
||||
with open(workflow_path, 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
|
||||
# Cache in memory
|
||||
self.workflows[workflow_id] = workflow
|
||||
|
||||
# NEW: Configure document handler for this workflow
|
||||
self.document_handler.set_workflow_id(workflow_id)
|
||||
|
||||
logger.info(f"Workflow {workflow_id} loaded from file: {workflow_path}")
|
||||
return workflow
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow from file: {str(e)}")
|
||||
|
||||
logger.warning(f"Workflow {workflow_id} not found")
|
||||
return None
|
||||
|
||||
async def delete_workflow(self, workflow_id: str) -> bool:
|
||||
"""
|
||||
Delete a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
|
||||
Returns:
|
||||
True on success, False if workflow not found
|
||||
"""
|
||||
# Remove from memory
|
||||
if workflow_id in self.workflows:
|
||||
del self.workflows[workflow_id]
|
||||
|
||||
# Delete from database
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
db_success = self.lucydom_interface.delete_workflow(workflow_id)
|
||||
logger.info(f"Workflow {workflow_id} deleted from database: {db_success}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting workflow {workflow_id} from database: {str(e)}")
|
||||
|
||||
# Delete file
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
|
||||
try:
|
||||
if os.path.exists(workflow_path):
|
||||
os.remove(workflow_path)
|
||||
logger.info(f"Workflow {workflow_id} deleted from file: {workflow_path}")
|
||||
return True
|
||||
else:
|
||||
logger.warning(f"Workflow {workflow_id} not found: {workflow_path}")
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting workflow file {workflow_id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def _initialize_workflow(self, workflow_id: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Initialize a new workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
|
||||
Returns:
|
||||
The initialized workflow object
|
||||
"""
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
# Create complete workflow object according to the data model
|
||||
workflow = {
|
||||
"id": workflow_id,
|
||||
"name": f"Workflow {workflow_id}",
|
||||
"mandate_id": self.mandate_id,
|
||||
"user_id": self.user_id,
|
||||
"status": "running",
|
||||
"started_at": current_time,
|
||||
"last_activity": current_time,
|
||||
"current_round": 1,
|
||||
|
||||
# Complete statistics structure according to DataStats model
|
||||
"data_stats": {
|
||||
"total_processing_time": 0.0,
|
||||
"total_token_count": 0,
|
||||
"total_bytes_sent": 0,
|
||||
"total_bytes_received": 0
|
||||
},
|
||||
|
||||
# Empty arrays for messages and logs
|
||||
"messages": [],
|
||||
"logs": []
|
||||
}
|
||||
|
||||
# Log entry for workflow start
|
||||
self._add_log(workflow, "Workflow started", "info", "workflow", "Workflow Management")
|
||||
|
||||
# Save workflow to database
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
# Direct save of the complete workflow object
|
||||
self.lucydom_interface.save_workflow_state(workflow)
|
||||
logger.info(f"Workflow {workflow_id} created in database")
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating workflow {workflow_id} in database: {str(e)}")
|
||||
|
||||
# Cache workflow in memory
|
||||
self.workflows[workflow_id] = workflow
|
||||
|
||||
return workflow
|
||||
|
||||
async def stop_workflow(self, workflow_id: str) -> bool:
|
||||
"""
|
||||
Stop a running workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow to stop
|
||||
|
||||
Returns:
|
||||
True on success, False if workflow not found or already stopped
|
||||
"""
|
||||
try:
|
||||
workflow = self.workflows.get(workflow_id)
|
||||
|
||||
if not workflow:
|
||||
# Try to load the workflow
|
||||
workflow = await self.load_workflow(workflow_id)
|
||||
if not workflow:
|
||||
return False
|
||||
|
||||
# If workflow is not running or completed, abort
|
||||
if workflow.get("status") not in ["running", "completed"]:
|
||||
return False
|
||||
|
||||
# Set status to stopped
|
||||
workflow["status"] = "stopped"
|
||||
workflow["last_activity"] = datetime.now().isoformat()
|
||||
|
||||
self._add_log(workflow, "Workflow was manually stopped", "info", "workflow", "Workflow Management")
|
||||
|
||||
# Save workflow
|
||||
self._save_workflow(workflow)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error stopping workflow {workflow_id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def _add_log(self, workflow: Dict[str, Any], message: str, log_type: str, agent_id: Optional[str] = None, agent_name: Optional[str] = None) -> None:
|
||||
"""Add a log entry to the workflow."""
|
||||
# First, check if workflow is a string (ID) instead of dictionary
|
||||
if isinstance(workflow, str):
|
||||
# Try to load the workflow by ID
|
||||
workflow_id = workflow
|
||||
workflow = self.workflows.get(workflow_id)
|
||||
if not workflow:
|
||||
# Just log to the logger and return
|
||||
logger.info(f"Log (couldn't add to workflow {workflow_id}): {log_type} - {message}")
|
||||
return
|
||||
|
||||
# Check if workflow is a dictionary
|
||||
if not isinstance(workflow, dict):
|
||||
logger.error(f"Invalid workflow type: {type(workflow)}. Expected dictionary.")
|
||||
# Just log to the logger and return
|
||||
logger.info(f"Log (couldn't add to workflow): {log_type} - {message}")
|
||||
return
|
||||
|
||||
# Create log entry
|
||||
log_entry = {
|
||||
"id": f"log_{uuid.uuid4()}",
|
||||
"message": message,
|
||||
"type": log_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"agent_id": agent_id,
|
||||
"agent_name": agent_name
|
||||
}
|
||||
|
||||
# Add log entry to workflow
|
||||
if "logs" not in workflow:
|
||||
workflow["logs"] = []
|
||||
|
||||
workflow["logs"].append(log_entry)
|
||||
|
||||
# Update last activity
|
||||
workflow["last_activity"] = log_entry["timestamp"]
|
||||
|
||||
# Save log entry to database if available
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
# Add workflow ID to log entry
|
||||
log_data = log_entry.copy()
|
||||
log_data["workflow_id"] = workflow["id"]
|
||||
|
||||
self.lucydom_interface.create_workflow_log(log_data)
|
||||
logger.debug(f"Log entry for workflow {workflow['id']} saved to database")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving log entry for workflow {workflow['id']} to database: {str(e)}")
|
||||
|
||||
# Also log to standard logger with the category prefix
|
||||
category_prefix = f"[{agent_name or agent_id or 'Workflow'}]" if agent_name or agent_id else ""
|
||||
log_message = f"{category_prefix} {message}"
|
||||
|
||||
if log_type == "error":
|
||||
logger.error(log_message)
|
||||
elif log_type == "warning":
|
||||
logger.warning(log_message)
|
||||
else:
|
||||
logger.info(log_message)
|
||||
|
||||
def get_workflow_status(self, workflow_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Get the status of a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
|
||||
Returns:
|
||||
Dictionary with status information or None if workflow not found
|
||||
"""
|
||||
# Get from memory
|
||||
workflow = self.workflows.get(workflow_id)
|
||||
|
||||
# If not in memory, load from database or file
|
||||
if not workflow:
|
||||
# Load from database if available
|
||||
if self.lucydom_interface:
|
||||
try:
|
||||
workflow_data = self.lucydom_interface.get_workflow(workflow_id)
|
||||
if workflow_data:
|
||||
workflow = workflow_data
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow status from database: {str(e)}")
|
||||
|
||||
# If not in database, load from file
|
||||
if not workflow:
|
||||
try:
|
||||
import json
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
if os.path.exists(workflow_path):
|
||||
with open(workflow_path, 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow status from file: {str(e)}")
|
||||
return None
|
||||
|
||||
if not workflow:
|
||||
return None
|
||||
|
||||
# Extract status information
|
||||
status_info = {
|
||||
"id": workflow.get("id"),
|
||||
"name": workflow.get("name", f"Workflow {workflow_id}"),
|
||||
"status": workflow.get("status"),
|
||||
"progress": 1.0 if workflow.get("status") in ["completed", "failed", "stopped"] else 0.5,
|
||||
"started_at": workflow.get("started_at"),
|
||||
"last_activity": workflow.get("last_activity"),
|
||||
"workflow_complete": workflow.get("status") == "completed",
|
||||
"current_round": workflow.get("current_round", 1),
|
||||
"data_stats": workflow.get("data_stats", {
|
||||
"total_processing_time": 0.0,
|
||||
"total_token_count": 0,
|
||||
"total_bytes_sent": 0,
|
||||
"total_bytes_received": 0
|
||||
})
|
||||
}
|
||||
|
||||
return status_info
|
||||
|
||||
def get_workflow_logs(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""
|
||||
Get logs for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
|
||||
Returns:
|
||||
List of logs or None if workflow not found
|
||||
"""
|
||||
# Get from memory
|
||||
workflow = self.workflows.get(workflow_id)
|
||||
|
||||
# If not in memory, load from database
|
||||
if not workflow and self.lucydom_interface:
|
||||
try:
|
||||
logs = self.lucydom_interface.get_workflow_logs(workflow_id)
|
||||
return logs
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow logs from database: {str(e)}")
|
||||
|
||||
# If not in database or no interface available, load from file
|
||||
if not workflow:
|
||||
try:
|
||||
import json
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
if os.path.exists(workflow_path):
|
||||
with open(workflow_path, 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow logs from file: {str(e)}")
|
||||
return None
|
||||
|
||||
return workflow.get("logs", []) if workflow else None
|
||||
|
||||
def get_workflow_messages(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]:
|
||||
"""
|
||||
Get messages for a workflow.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
|
||||
Returns:
|
||||
List of messages or None if workflow not found
|
||||
"""
|
||||
# Get from memory
|
||||
workflow = self.workflows.get(workflow_id)
|
||||
|
||||
# If not in memory, load from database
|
||||
if not workflow and self.lucydom_interface:
|
||||
try:
|
||||
messages = self.lucydom_interface.get_workflow_messages(workflow_id)
|
||||
return messages
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow messages from database: {str(e)}")
|
||||
|
||||
# If not in database or no interface available, load from file
|
||||
if not workflow:
|
||||
try:
|
||||
import json
|
||||
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
|
||||
if os.path.exists(workflow_path):
|
||||
with open(workflow_path, 'r', encoding='utf-8') as f:
|
||||
workflow = json.load(f)
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading workflow messages from file: {str(e)}")
|
||||
return None
|
||||
|
||||
return workflow.get("messages", []) if workflow else None
|
||||
|
||||
# Factory function for WorkflowManager
|
||||
def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):
|
||||
"""
|
||||
Get a WorkflowManager instance for the specified context.
|
||||
Reuses existing instances and updates dependencies.
|
||||
|
||||
Args:
|
||||
mandate_id: Mandate ID
|
||||
user_id: User ID
|
||||
ai_service: AI service
|
||||
lucydom_interface: LucyDOM interface
|
||||
|
||||
Returns:
|
||||
WorkflowManager instance
|
||||
"""
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
|
||||
context_key = f"{mandate_id}_{user_id}"
|
||||
|
||||
# Get LucyDOM interface if not provided
|
||||
if not lucydom_interface:
|
||||
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
||||
if context_key not in _workflow_managers:
|
||||
_workflow_managers[context_key] = WorkflowManager(
|
||||
mandate_id,
|
||||
user_id,
|
||||
ai_service,
|
||||
lucydom_interface
|
||||
)
|
||||
|
||||
# Update services if provided
|
||||
if ai_service is not None:
|
||||
_workflow_managers[context_key].ai_service = ai_service
|
||||
|
||||
# NEW: Update document handler's AI service
|
||||
if hasattr(_workflow_managers[context_key], 'document_handler'):
|
||||
_workflow_managers[context_key].document_handler.set_ai_service(ai_service)
|
||||
|
||||
# NEW: Update agent registry dependencies
|
||||
from modules.agentservice_registry import AgentRegistry
|
||||
registry = AgentRegistry.get_instance()
|
||||
registry.set_dependencies(ai_service=ai_service)
|
||||
|
||||
return _workflow_managers[context_key]
|
||||
|
||||
# Singleton factory for WorkflowManager instances per context
|
||||
_workflow_managers = {}
|
||||
|
|
@ -6,7 +6,7 @@ from jose import JWTError, jwt
|
|||
import logging
|
||||
|
||||
from modules.gateway_interface import get_gateway_interface
|
||||
from modules.utility import APP_CONFIG
|
||||
from gateway.modules.configuration import APP_CONFIG
|
||||
|
||||
# Get Config Data
|
||||
SECRET_KEY = APP_CONFIG.get("APP_JWT_SECRET_SECRET")
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
880
modules/chat.py
880
modules/chat.py
File diff suppressed because it is too large
Load diff
|
|
@ -27,7 +27,6 @@ class AgentAnalyst(AgentBase):
|
|||
super().__init__()
|
||||
self.name = "Data Analyst"
|
||||
self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation"
|
||||
self.result_format = "AnalysisReport"
|
||||
|
||||
# Visualisierungseinstellungen
|
||||
self.plt_style = 'seaborn-v0_8-whitegrid'
|
||||
|
|
@ -38,13 +37,6 @@ class AgentAnalyst(AgentBase):
|
|||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"supported_formats": ["csv", "xlsx", "json", "text"],
|
||||
"analysis_types": ["statistical", "trend", "comparative", "predictive", "clustering", "general"],
|
||||
"visualization_types": ["bar", "line", "scatter", "histogram", "box", "heatmap", "pie"]
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -66,7 +58,6 @@ class AgentAnalyst(AgentBase):
|
|||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id,
|
||||
"documents": []
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,7 +27,6 @@ class AgentCoder(AgentBase):
|
|||
super().__init__()
|
||||
self.name = "Python Code Agent"
|
||||
self.capabilities = "code_development,data_processing,file_processing,automation"
|
||||
self.result_format = "python_code"
|
||||
|
||||
# Executor-Einstellungen
|
||||
self.executor_timeout = 60 # Sekunden
|
||||
|
|
@ -42,13 +41,6 @@ class AgentCoder(AgentBase):
|
|||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"timeout": self.executor_timeout,
|
||||
"memory_limit": self.executor_memory_limit,
|
||||
"max_correction_attempts": self.max_correction_attempts
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -70,7 +62,6 @@ class AgentCoder(AgentBase):
|
|||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id,
|
||||
"documents": []
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,26 +23,9 @@ class AgentCreative(AgentBase):
|
|||
"document_analysis,text_processing,table_creation,"
|
||||
"content_structuring")
|
||||
|
||||
self.result_format = "Text,Document,Table"
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"specialties": [
|
||||
"creative_writing",
|
||||
"documentation",
|
||||
"knowledge",
|
||||
"poweron",
|
||||
"document_processing",
|
||||
"information_extraction",
|
||||
"content_transformation",
|
||||
"table_generation",
|
||||
"document_analysis"
|
||||
]
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -64,7 +47,6 @@ class AgentCreative(AgentBase):
|
|||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id,
|
||||
"documents": []
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,17 +21,10 @@ class AgentDocumentation(AgentBase):
|
|||
super().__init__()
|
||||
self.name = "Documentation Specialist"
|
||||
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
|
||||
self.result_format = "FormattedDocument"
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"document_types": ["manual", "report", "process", "presentation", "document"],
|
||||
"formats": ["markdown", "text"]
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -53,7 +46,6 @@ class AgentDocumentation(AgentBase):
|
|||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id,
|
||||
"documents": []
|
||||
}
|
||||
|
|
@ -79,7 +71,7 @@ class AgentDocumentation(AgentBase):
|
|||
is_complex = self._assess_complexity(enhanced_prompt)
|
||||
|
||||
# Titel generieren
|
||||
title = self._generate_title(enhanced_prompt, document_type)
|
||||
title = await self._generate_title(enhanced_prompt, document_type)
|
||||
|
||||
# Inhalt basierend auf Komplexität generieren
|
||||
if is_complex:
|
||||
|
|
|
|||
|
|
@ -6,14 +6,13 @@ Angepasst für die neue chat.py Architektur und chat_registry.py.
|
|||
import json
|
||||
import logging
|
||||
import time
|
||||
import traceback
|
||||
from typing import Dict, Any, List, Optional
|
||||
from urllib.parse import quote_plus, unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from modules.chat_registry import AgentBase
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -24,8 +23,7 @@ class AgentWebcrawler(AgentBase):
|
|||
"""Initialisiert den Webcrawler-Agent"""
|
||||
super().__init__()
|
||||
self.name = "Webscraper"
|
||||
self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
|
||||
self.result_format = "SearchResults"
|
||||
self.capabilities = "web_search,website_information_retrieval"
|
||||
|
||||
# Web-Crawling-Konfiguration
|
||||
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS"))
|
||||
|
|
@ -36,13 +34,6 @@ class AgentWebcrawler(AgentBase):
|
|||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
info.update({
|
||||
"metadata": {
|
||||
"max_url": self.max_url,
|
||||
"max_result": self.max_result,
|
||||
"timeout": self.timeout
|
||||
}
|
||||
})
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -64,7 +55,6 @@ class AgentWebcrawler(AgentBase):
|
|||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format,
|
||||
"workflow_id": workflow_id
|
||||
}
|
||||
|
||||
|
|
|
|||
777
modules/chat_content_extraction.py
Normal file
777
modules/chat_content_extraction.py
Normal file
|
|
@ -0,0 +1,777 @@
|
|||
"""
|
||||
Modul zur Extraktion von Inhalten aus verschiedenen Dateiformaten.
|
||||
Bietet spezialisierte Funktionen für die Verarbeitung von Text, PDF, Office-Dokumenten, Bildern usw.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import os
|
||||
import io
|
||||
from typing import Dict, Any, List, Optional, Union, Tuple
|
||||
import base64
|
||||
|
||||
# Logger konfigurieren
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Optional imports - only loaded when needed
|
||||
pdf_extractor_loaded = False
|
||||
office_extractor_loaded = False
|
||||
image_processor_loaded = False
|
||||
|
||||
def get_document_contents(file_metadata: Dict[str, Any], file_content: bytes) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Hauptfunktion zur Extraktion von Inhalten aus einer Datei basierend auf dem MIME-Typ.
|
||||
Delegiert an spezialisierte Extraktionsfunktionen.
|
||||
|
||||
Args:
|
||||
file_metadata: Metadaten der Datei (Name, MIME-Typ, etc.)
|
||||
file_content: Binärdaten der Datei
|
||||
|
||||
Returns:
|
||||
Liste von Document-Content-Objekten mit metadata und is_text Flag
|
||||
"""
|
||||
try:
|
||||
mime_type = file_metadata.get("mime_type", "application/octet-stream")
|
||||
file_name = file_metadata.get("name", "unknown")
|
||||
|
||||
logger.info(f"Extrahiere Inhalte aus Datei '{file_name}' (MIME-Typ: {mime_type})")
|
||||
|
||||
# Inhalte basierend auf MIME-Typ extrahieren
|
||||
contents = []
|
||||
|
||||
# Text-basierte Formate
|
||||
if mime_type.startswith("text/") or mime_type in [
|
||||
"application/json",
|
||||
"application/xml",
|
||||
"application/javascript",
|
||||
"application/x-python"
|
||||
]:
|
||||
contents.extend(extract_text_content(file_name, file_content, mime_type))
|
||||
|
||||
# CSV Format
|
||||
elif mime_type == "text/csv":
|
||||
contents.extend(extract_csv_content(file_name, file_content))
|
||||
|
||||
# Bilder
|
||||
elif mime_type.startswith("image/"):
|
||||
contents.extend(extract_image_content(file_name, file_content, mime_type))
|
||||
|
||||
# PDF Dokumente
|
||||
elif mime_type == "application/pdf":
|
||||
contents.extend(extract_pdf_content(file_name, file_content))
|
||||
|
||||
# Word-Dokumente
|
||||
elif mime_type in [
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
"application/msword"
|
||||
]:
|
||||
contents.extend(extract_word_content(file_name, file_content, mime_type))
|
||||
|
||||
# Excel-Dokumente
|
||||
elif mime_type in [
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
|
||||
"application/vnd.ms-excel"
|
||||
]:
|
||||
contents.extend(extract_excel_content(file_name, file_content, mime_type))
|
||||
|
||||
# PowerPoint-Dokumente
|
||||
elif mime_type in [
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation",
|
||||
"application/vnd.ms-powerpoint"
|
||||
]:
|
||||
contents.extend(extract_powerpoint_content(file_name, file_content, mime_type))
|
||||
|
||||
# Binärdaten als Fallback für unbekannte Formate
|
||||
else:
|
||||
contents.extend(extract_binary_content(file_name, file_content, mime_type))
|
||||
|
||||
# Fallback, wenn keine Inhalte extrahiert werden konnten
|
||||
if not contents:
|
||||
logger.warning(f"Keine Inhalte aus Datei '{file_name}' extrahiert, verwende Binär-Fallback")
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": '1_undefined',
|
||||
"ext": os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "bin",
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
})
|
||||
|
||||
for content in contents:
|
||||
if isinstance(content.get("data"), bytes):
|
||||
content["data"] = base64.b64encode(content["data"]).decode('utf-8')
|
||||
# Markiere in Metadaten, dass dies base64-kodiert ist
|
||||
if "metadata" not in content:
|
||||
content["metadata"] = {}
|
||||
content["metadata"]["base64_encoded"] = True
|
||||
|
||||
logger.info(f"Erfolgreich {len(contents)} Inhalte aus Datei '{file_name}' extrahiert")
|
||||
return contents
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Inhaltsextraktion: {str(e)}")
|
||||
# Fallback bei Fehler - Originaldaten zurückgeben
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": file_metadata.get("name", "unknown"),
|
||||
"ext": os.path.splitext(file_metadata.get("name", ""))[1][1:] if os.path.splitext(file_metadata.get("name", ""))[1] else "bin",
|
||||
"content_type": file_metadata.get("mime_type", "application/octet-stream"),
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
}]
|
||||
|
||||
|
||||
def _load_pdf_extractor():
|
||||
"""Lädt die PDF-Extraktions-Bibliotheken bei Bedarf"""
|
||||
global pdf_extractor_loaded
|
||||
if not pdf_extractor_loaded:
|
||||
try:
|
||||
global PyPDF2, fitz
|
||||
import PyPDF2
|
||||
import fitz # PyMuPDF für umfangreichere PDF-Verarbeitung
|
||||
pdf_extractor_loaded = True
|
||||
logger.info("PDF-Extraktions-Bibliotheken erfolgreich geladen")
|
||||
except ImportError as e:
|
||||
logger.warning(f"PDF-Extraktions-Bibliotheken konnten nicht geladen werden: {e}")
|
||||
|
||||
def _load_office_extractor():
|
||||
"""Lädt die Office-Dokument-Extraktions-Bibliotheken bei Bedarf"""
|
||||
global office_extractor_loaded
|
||||
if not office_extractor_loaded:
|
||||
try:
|
||||
global docx, openpyxl
|
||||
import docx # python-docx für Word-Dokumente
|
||||
import openpyxl # für Excel-Dateien
|
||||
office_extractor_loaded = True
|
||||
logger.info("Office-Extraktions-Bibliotheken erfolgreich geladen")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Office-Extraktions-Bibliotheken konnten nicht geladen werden: {e}")
|
||||
|
||||
def _load_image_processor():
|
||||
"""Lädt die Bild-Verarbeitungs-Bibliotheken bei Bedarf"""
|
||||
global image_processor_loaded
|
||||
if not image_processor_loaded:
|
||||
try:
|
||||
global PIL, Image
|
||||
from PIL import Image
|
||||
image_processor_loaded = True
|
||||
logger.info("Bild-Verarbeitungs-Bibliotheken erfolgreich geladen")
|
||||
except ImportError as e:
|
||||
logger.warning(f"Bild-Verarbeitungs-Bibliotheken konnten nicht geladen werden: {e}")
|
||||
|
||||
def extract_text_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Text aus Textdateien.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste von Text-Content-Objekten mit metadata.is_text = True
|
||||
"""
|
||||
try:
|
||||
# Originaldateiendung beibehalten
|
||||
file_extension = os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "txt"
|
||||
|
||||
# Text-Inhalt extrahieren
|
||||
text_content = file_content.decode('utf-8')
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_text", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": "text",
|
||||
"data": text_content,
|
||||
"metadata": {
|
||||
"is_text": True
|
||||
}
|
||||
}]
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(f"Konnte Text aus Datei '{file_name}' nicht als UTF-8 decodieren, versuche andere Kodierungen")
|
||||
try:
|
||||
# Versuche alternative Kodierungen
|
||||
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||
try:
|
||||
text_content = file_content.decode(encoding)
|
||||
logger.info(f"Text erfolgreich mit Kodierung {encoding} decodiert")
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_text", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": "text",
|
||||
"data": text_content,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"encoding": encoding
|
||||
}
|
||||
}]
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
# Fallback auf Binärdaten, wenn keine Kodierung funktioniert
|
||||
logger.warning(f"Konnte Text nicht decodieren, verwende Binärdaten")
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_binary", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
}]
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der alternativen Textdekodierung: {str(e)}")
|
||||
# Binärdaten als Fallback zurückgeben
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_binary", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
}]
|
||||
|
||||
def extract_csv_content(file_name: str, file_content: bytes) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Inhalt aus CSV-Dateien.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
|
||||
Returns:
|
||||
Liste von CSV-Content-Objekten mit metadata.is_text = True
|
||||
"""
|
||||
try:
|
||||
# Text-Inhalt extrahieren
|
||||
csv_content = file_content.decode('utf-8')
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_csv", # Simplified naming
|
||||
"ext": "csv",
|
||||
"content_type": "csv",
|
||||
"data": csv_content,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"format": "csv"
|
||||
}
|
||||
}]
|
||||
except UnicodeDecodeError:
|
||||
logger.warning(f"Konnte CSV aus Datei '{file_name}' nicht als UTF-8 decodieren, versuche andere Kodierungen")
|
||||
try:
|
||||
# Versuche alternative Kodierungen für CSV
|
||||
for encoding in ['latin-1', 'cp1252', 'iso-8859-1']:
|
||||
try:
|
||||
csv_content = file_content.decode(encoding)
|
||||
logger.info(f"CSV erfolgreich mit Kodierung {encoding} decodiert")
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_csv", # Simplified naming
|
||||
"ext": "csv",
|
||||
"content_type": "csv",
|
||||
"data": csv_content,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"encoding": encoding,
|
||||
"format": "csv"
|
||||
}
|
||||
}]
|
||||
except UnicodeDecodeError:
|
||||
continue
|
||||
|
||||
# Fallback auf Binärdaten
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_binary", # Simplified naming
|
||||
"ext": "csv",
|
||||
"content_type": "text/csv",
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
}]
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der alternativen CSV-Dekodierung: {str(e)}")
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_binary", # Simplified naming
|
||||
"ext": "csv",
|
||||
"content_type": "text/csv",
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False
|
||||
}
|
||||
}]
|
||||
|
||||
def extract_image_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Inhalt aus Bilddateien und erzeugt ggf. Metadaten-Beschreibungen.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste von Image-Content-Objekten mit metadata.is_text = False
|
||||
"""
|
||||
|
||||
# Dateiendung aus MIME-Typ oder Dateinamen extrahieren
|
||||
file_extension = mime_type.split('/')[-1]
|
||||
if file_extension == "jpeg":
|
||||
file_extension = "jpg"
|
||||
|
||||
# Wenn möglich, Bild analysieren und Metadaten extrahieren
|
||||
image_metadata = {
|
||||
"is_text": False,
|
||||
"format": "image"
|
||||
}
|
||||
image_description = None
|
||||
|
||||
try:
|
||||
_load_image_processor()
|
||||
if image_processor_loaded and file_content and len(file_content) > 0:
|
||||
with io.BytesIO(file_content) as img_stream:
|
||||
try:
|
||||
img = Image.open(img_stream)
|
||||
# Überprüfe, ob das Bild tatsächlich geladen wurde
|
||||
img.verify()
|
||||
# Um sicher weiterzuarbeiten, neu laden
|
||||
img_stream.seek(0)
|
||||
img = Image.open(img_stream)
|
||||
image_metadata.update({
|
||||
"format": img.format,
|
||||
"mode": img.mode,
|
||||
"width": img.width,
|
||||
"height": img.height
|
||||
})
|
||||
# Extrahiere EXIF-Daten, falls vorhanden
|
||||
if hasattr(img, '_getexif') and callable(img._getexif):
|
||||
exif = img._getexif()
|
||||
if exif:
|
||||
exif_data = {}
|
||||
for tag_id, value in exif.items():
|
||||
exif_data[f"tag_{tag_id}"] = str(value)
|
||||
image_metadata["exif"] = exif_data
|
||||
|
||||
# Erzeuge Bildbeschreibung
|
||||
image_description = f"Image ({img.width}x{img.height}, {img.format}, {img.mode})"
|
||||
except Exception as inner_e:
|
||||
logger.warning(f"Fehler beim Verarbeiten des Bildes: {str(inner_e)}")
|
||||
image_metadata["error"] = str(inner_e)
|
||||
image_description = f"Image (unable to process: {str(inner_e)})"
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte Bildmetadaten nicht extrahieren: {str(e)}")
|
||||
image_metadata["error"] = str(e)
|
||||
|
||||
|
||||
# Bild-Inhalt zurückgeben
|
||||
contents = [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_image", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": "image",
|
||||
"data": file_content,
|
||||
"metadata": image_metadata
|
||||
}]
|
||||
|
||||
# Falls Bildbeschreibung vorhanden, als zusätzlichen Text-Content hinzufügen
|
||||
if image_description:
|
||||
contents.append({
|
||||
"sequence_nr": 2,
|
||||
"name": "2_text_image_info", # Simplified naming with label
|
||||
"ext": "txt",
|
||||
"content_type": "text",
|
||||
"data": image_description,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"image_description": True
|
||||
}
|
||||
})
|
||||
|
||||
return contents
|
||||
|
||||
def extract_pdf_content(file_name: str, file_content: bytes) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Text und Bilder aus PDF-Dateien.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
|
||||
Returns:
|
||||
Liste von PDF-Content-Objekten (Text und Bilder) mit metadata.is_text Flag
|
||||
"""
|
||||
contents = []
|
||||
extracted_content_found = False
|
||||
|
||||
try:
|
||||
# PDF-Extraktions-Bibliotheken laden
|
||||
_load_pdf_extractor()
|
||||
if not pdf_extractor_loaded:
|
||||
logger.warning("PDF-Extraktion nicht möglich: Bibliotheken nicht verfügbar")
|
||||
# Originaldatei als binären Inhalt hinzufügen
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_pdf", # Simplified naming
|
||||
"ext": "pdf",
|
||||
"content_type": "application/pdf",
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "pdf"
|
||||
}
|
||||
})
|
||||
return contents
|
||||
|
||||
# Text mit PyPDF2 extrahieren
|
||||
extracted_text = ""
|
||||
pdf_metadata = {}
|
||||
with io.BytesIO(file_content) as pdf_stream:
|
||||
pdf_reader = PyPDF2.PdfReader(pdf_stream)
|
||||
|
||||
# Metadaten extrahieren
|
||||
pdf_info = pdf_reader.metadata or {}
|
||||
for key, value in pdf_info.items():
|
||||
if key.startswith('/'):
|
||||
pdf_metadata[key[1:]] = value
|
||||
else:
|
||||
pdf_metadata[key] = value
|
||||
|
||||
# Text aus allen Seiten extrahieren
|
||||
for page_num in range(len(pdf_reader.pages)):
|
||||
page = pdf_reader.pages[page_num]
|
||||
page_text = page.extract_text()
|
||||
if page_text:
|
||||
extracted_text += f"--- Seite {page_num + 1} ---\n{page_text}\n\n"
|
||||
|
||||
# Wenn Text gefunden wurde, als eigenen Content hinzufügen
|
||||
if extracted_text.strip():
|
||||
extracted_content_found = True
|
||||
contents.append({
|
||||
"sequence_nr": len(contents) + 1,
|
||||
"name": f"{len(contents) + 1}_text", # Simplified naming
|
||||
"ext": "txt",
|
||||
"content_type": "text",
|
||||
"data": extracted_text,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"source": "pdf",
|
||||
"pages": len(pdf_reader.pages),
|
||||
"pdf_metadata": pdf_metadata
|
||||
}
|
||||
})
|
||||
|
||||
# Bilder mit PyMuPDF (fitz) extrahieren
|
||||
try:
|
||||
with io.BytesIO(file_content) as pdf_stream:
|
||||
doc = fitz.open(stream=pdf_stream, filetype="pdf")
|
||||
image_count = 0
|
||||
|
||||
for page_num in range(len(doc)):
|
||||
page = doc[page_num]
|
||||
image_list = page.get_images(full=True)
|
||||
|
||||
for img_index, img_info in enumerate(image_list):
|
||||
try:
|
||||
image_count += 1
|
||||
xref = img_info[0]
|
||||
base_image = doc.extract_image(xref)
|
||||
image_bytes = base_image["image"]
|
||||
image_ext = base_image["ext"]
|
||||
|
||||
# Bild als Content hinzufügen
|
||||
extracted_content_found = True
|
||||
contents.append({
|
||||
"sequence_nr": len(contents) + 1,
|
||||
"name": f"{len(contents) + 1}_image_page{page_num+1}_{img_index+1}", # Simplified naming with label
|
||||
"ext": image_ext,
|
||||
"content_type": f"image/{image_ext}",
|
||||
"data": image_bytes,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"source": "pdf",
|
||||
"page": page_num + 1,
|
||||
"index": img_index
|
||||
}
|
||||
})
|
||||
except Exception as img_e:
|
||||
logger.warning(f"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num + 1}: {str(img_e)}")
|
||||
|
||||
# Dokument schließen
|
||||
doc.close()
|
||||
|
||||
except Exception as img_extract_e:
|
||||
logger.warning(f"Fehler bei der Bildextraktion aus PDF: {str(img_extract_e)}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der PDF-Extraktion: {str(e)}")
|
||||
|
||||
# Wenn keine Inhalte extrahiert wurden, füge das Original-PDF hinzu
|
||||
if not extracted_content_found:
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_pdf", # Simplified naming
|
||||
"ext": "pdf",
|
||||
"content_type": "application/pdf",
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "pdf"
|
||||
}
|
||||
})
|
||||
|
||||
return contents
|
||||
|
||||
def extract_word_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Text und Bilder aus Word-Dokumenten.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste von Word-Content-Objekten (Text und ggf. Bilder) mit metadata.is_text Flag
|
||||
"""
|
||||
contents = []
|
||||
extracted_content_found = False
|
||||
|
||||
# Dateiendung bestimmen
|
||||
file_extension = "docx" if mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document" else "doc"
|
||||
|
||||
try:
|
||||
# Office-Extraktions-Bibliotheken laden
|
||||
_load_office_extractor()
|
||||
if not office_extractor_loaded:
|
||||
logger.warning("Word-Extraktion nicht möglich: Bibliotheken nicht verfügbar")
|
||||
# Originaldatei als binären Inhalt hinzufügen
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_word", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "word"
|
||||
}
|
||||
})
|
||||
return contents
|
||||
|
||||
# Unterstützt nur DOCX (neueres Format)
|
||||
if mime_type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
||||
with io.BytesIO(file_content) as docx_stream:
|
||||
doc = docx.Document(docx_stream)
|
||||
|
||||
# Text extrahieren
|
||||
full_text = []
|
||||
for para in doc.paragraphs:
|
||||
full_text.append(para.text)
|
||||
|
||||
# Tabellen extrahieren
|
||||
for table in doc.tables:
|
||||
for row in table.rows:
|
||||
row_text = []
|
||||
for cell in row.cells:
|
||||
row_text.append(cell.text)
|
||||
full_text.append(" | ".join(row_text))
|
||||
|
||||
extracted_text = "\n\n".join(full_text)
|
||||
|
||||
# Extrahierten Text als Content hinzufügen
|
||||
if extracted_text.strip():
|
||||
extracted_content_found = True
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_text", # Simplified naming
|
||||
"ext": "txt",
|
||||
"content_type": "text",
|
||||
"data": extracted_text,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"source": "docx",
|
||||
"paragraph_count": len(doc.paragraphs),
|
||||
"table_count": len(doc.tables)
|
||||
}
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Extraktion aus altem Word-Format (DOC) nicht unterstützt")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Word-Extraktion: {str(e)}")
|
||||
|
||||
# Wenn keine Inhalte extrahiert wurden, füge das Original-Dokument hinzu
|
||||
if not extracted_content_found:
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_word", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "word"
|
||||
}
|
||||
})
|
||||
|
||||
return contents
|
||||
|
||||
def extract_excel_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Tabellendaten aus Excel-Dateien.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste von Excel-Content-Objekten mit metadata.is_text Flag
|
||||
"""
|
||||
contents = []
|
||||
extracted_content_found = False
|
||||
|
||||
# Dateiendung bestimmen
|
||||
file_extension = "xlsx" if mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet" else "xls"
|
||||
|
||||
try:
|
||||
# Office-Extraktions-Bibliotheken laden
|
||||
_load_office_extractor()
|
||||
if not office_extractor_loaded:
|
||||
logger.warning("Excel-Extraktion nicht möglich: Bibliotheken nicht verfügbar")
|
||||
# Originaldatei als binären Inhalt hinzufügen
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_excel", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "excel"
|
||||
}
|
||||
})
|
||||
return contents
|
||||
|
||||
# Unterstützt nur XLSX (neueres Format)
|
||||
if mime_type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
|
||||
with io.BytesIO(file_content) as xlsx_stream:
|
||||
workbook = openpyxl.load_workbook(xlsx_stream, data_only=True)
|
||||
|
||||
# Jedes Arbeitsblatt als separaten CSV-Content extrahieren
|
||||
for sheet_index, sheet_name in enumerate(workbook.sheetnames):
|
||||
sheet = workbook[sheet_name]
|
||||
|
||||
# Daten als CSV formatieren
|
||||
csv_rows = []
|
||||
for row in sheet.iter_rows():
|
||||
csv_row = []
|
||||
for cell in row:
|
||||
value = cell.value
|
||||
if value is None:
|
||||
csv_row.append("")
|
||||
else:
|
||||
csv_row.append(str(value).replace('"', '""'))
|
||||
csv_rows.append(','.join(f'"{cell}"' for cell in csv_row))
|
||||
|
||||
csv_content = "\n".join(csv_rows)
|
||||
|
||||
# Als CSV-Content hinzufügen
|
||||
if csv_content.strip():
|
||||
extracted_content_found = True
|
||||
sheet_safe_name = sheet_name.replace(" ", "_").replace("/", "_").replace("\\", "_")
|
||||
contents.append({
|
||||
"sequence_nr": len(contents) + 1,
|
||||
"name": f"{len(contents) + 1}_csv_{sheet_safe_name}", # Simplified naming with sheet label
|
||||
"ext": "csv",
|
||||
"content_type": "csv",
|
||||
"data": csv_content,
|
||||
"metadata": {
|
||||
"is_text": True,
|
||||
"source": "xlsx",
|
||||
"sheet": sheet_name,
|
||||
"format": "csv"
|
||||
}
|
||||
})
|
||||
else:
|
||||
logger.warning(f"Extraktion aus altem Excel-Format (XLS) nicht unterstützt")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Excel-Extraktion: {str(e)}")
|
||||
|
||||
# Wenn keine Inhalte extrahiert wurden, füge das Original-Dokument hinzu
|
||||
if not extracted_content_found:
|
||||
contents.append({
|
||||
"sequence_nr": 1,
|
||||
"name": "1_excel", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "excel"
|
||||
}
|
||||
})
|
||||
|
||||
return contents
|
||||
|
||||
def extract_powerpoint_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Inhalte aus PowerPoint-Präsentationen.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste von PowerPoint-Content-Objekten mit metadata.is_text = False
|
||||
"""
|
||||
# Für PowerPoint geben wir aktuell nur die originale Binärdatei zurück
|
||||
# Eine vollständige Extraktion würde mehr spezialisierte Bibliotheken erfordern
|
||||
file_extension = "pptx" if mime_type == "application/vnd.openxmlformats-officedocument.presentationml.presentation" else "ppt"
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_powerpoint", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "powerpoint"
|
||||
}
|
||||
}]
|
||||
|
||||
def extract_binary_content(file_name: str, file_content: bytes, mime_type: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Fallback für binäre Dateien, bei denen keine spezifische Extraktion möglich ist.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei
|
||||
file_content: Binärdaten der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Liste mit einem binären Content-Objekt mit metadata.is_text = False
|
||||
"""
|
||||
file_extension = os.path.splitext(file_name)[1][1:] if os.path.splitext(file_name)[1] else "bin"
|
||||
return [{
|
||||
"sequence_nr": 1,
|
||||
"name": "1_binary", # Simplified naming
|
||||
"ext": file_extension,
|
||||
"content_type": mime_type,
|
||||
"data": file_content,
|
||||
"metadata": {
|
||||
"is_text": False,
|
||||
"format": "binary"
|
||||
}
|
||||
}]
|
||||
|
|
@ -140,9 +140,8 @@ class AgentRegistry:
|
|||
agent_infos.append({
|
||||
"name": agent.name,
|
||||
"capabilities": getattr(agent, 'capabilities', ""),
|
||||
"result_format": getattr(agent, 'result_format', "Text")
|
||||
})
|
||||
logger.error(f"Agent mit Kennung '{agent.name}' hat keine vollständigen Daten")
|
||||
logger.error(f"Agent '{agent.name}' does not show profile.")
|
||||
seen_agents.add(agent)
|
||||
return agent_infos
|
||||
|
||||
|
|
@ -158,7 +157,6 @@ class AgentBase:
|
|||
"""Initialisiere den Basis-Agenten."""
|
||||
self.name = "Basis-Agent"
|
||||
self.capabilities = "Grundlegende Agentenfunktionen"
|
||||
self.result_format = "Text"
|
||||
self.ai_service = None
|
||||
|
||||
def set_dependencies(self, ai_service=None):
|
||||
|
|
@ -168,7 +166,6 @@ class AgentBase:
|
|||
return {
|
||||
"name": self.name,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format
|
||||
}
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
|
|
@ -179,7 +176,6 @@ class AgentBase:
|
|||
"role": "assistant",
|
||||
"content": f"Ich bin {self.name}, aber ich bin nicht richtig konfiguriert. Bitte den AI-Service einrichten.",
|
||||
"agent_name": self.name,
|
||||
"result_format": "Text"
|
||||
}
|
||||
|
||||
# Einfachen Prompt erstellen
|
||||
|
|
@ -196,7 +192,6 @@ class AgentBase:
|
|||
"role": "assistant",
|
||||
"content": response_content,
|
||||
"agent_name": self.name,
|
||||
"result_format": self.result_format
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler in Agent {self.id}: {str(e)}")
|
||||
|
|
@ -204,7 +199,6 @@ class AgentBase:
|
|||
"role": "assistant",
|
||||
"content": f"Ich habe einen Fehler festgestellt: {str(e)}",
|
||||
"agent_name": self.name,
|
||||
"result_format": "Text"
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,11 +6,9 @@ config.ini files and environment variables stored in .env files, using a flat st
|
|||
"""
|
||||
|
||||
import os
|
||||
import configparser
|
||||
import logging
|
||||
from typing import Any, Dict, Optional
|
||||
from pathlib import Path
|
||||
import time
|
||||
|
||||
# Set up logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -5,7 +5,7 @@ import importlib
|
|||
from passlib.context import CryptContext
|
||||
|
||||
from connectors.connector_db_json import DatabaseConnector
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,5 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Label(BaseModel):
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ import importlib
|
|||
import hashlib
|
||||
|
||||
from connectors.connector_db_json import DatabaseConnector
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -208,15 +208,18 @@ class LucyDOMInterface:
|
|||
# File Utilities
|
||||
|
||||
def calculate_file_hash(self, file_content: bytes) -> str:
|
||||
"""Berechnet einen SHA-256-Hash für den Dateiinhalt"""
|
||||
return hashlib.sha256(file_content).hexdigest()
|
||||
|
||||
def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]:
|
||||
"""Prüft, ob bereits eine Datei mit demselben Hash existiert"""
|
||||
files = self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
||||
if files:
|
||||
return files[0]
|
||||
return None
|
||||
|
||||
def get_mime_type(self, filename: str) -> str:
|
||||
"""Ermittelt den MIME-Typ basierend auf der Dateiendung"""
|
||||
import os
|
||||
ext = os.path.splitext(filename)[1].lower()[1:]
|
||||
extension_to_mime = {
|
||||
|
|
@ -246,42 +249,53 @@ class LucyDOMInterface:
|
|||
return extension_to_mime.get(ext.lower(), "application/octet-stream")
|
||||
|
||||
|
||||
# File Methoden
|
||||
# File Methoden - Metadaten-basierte Operationen
|
||||
|
||||
def get_all_files(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt alle Dateien des aktuellen Mandanten zurück"""
|
||||
"""
|
||||
Gibt alle Dateien des aktuellen Mandanten zurück ohne Binärdaten.
|
||||
|
||||
Returns:
|
||||
Liste von FileItem-Objekten ohne Binärdaten
|
||||
"""
|
||||
files = self.db.get_recordset("files")
|
||||
# Remove binary data from the response to reduce payload size
|
||||
for file in files:
|
||||
if "data" in file:
|
||||
del file["data"]
|
||||
return files
|
||||
|
||||
def get_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Gibt eine Datei anhand ihrer ID zurück, ohne Binärdaten"""
|
||||
"""
|
||||
Gibt eine Datei anhand ihrer ID zurück, ohne Binärdaten.
|
||||
|
||||
Args:
|
||||
file_id: ID der gesuchten Datei
|
||||
|
||||
Returns:
|
||||
FileItem ohne Binärdaten oder None, wenn nicht gefunden
|
||||
"""
|
||||
files = self.db.get_recordset("files", record_filter={"id": file_id})
|
||||
if files:
|
||||
file = files[0]
|
||||
# Remove binary data from the response to reduce payload size
|
||||
if "data" in file:
|
||||
del file["data"]
|
||||
return file
|
||||
return files[0]
|
||||
return None
|
||||
|
||||
def create_file(self,
|
||||
name: str,
|
||||
mime_type: str,
|
||||
size: int = None,
|
||||
data: bytes = None,
|
||||
file_hash: str = None) -> Dict[str, Any]:
|
||||
"""Erstellt einen neuen Dateieintrag in der Datenbank mit Inhalt"""
|
||||
def create_file(self, name: str, mime_type: str, size: int = None, file_hash: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Erstellt einen neuen Dateieintrag in der Datenbank ohne Inhalt.
|
||||
Der eigentliche Dateiinhalt wird separat in der FileData-Tabelle gespeichert.
|
||||
|
||||
Args:
|
||||
name: Name der Datei
|
||||
mime_type: MIME-Typ der Datei
|
||||
size: Größe der Datei in Bytes
|
||||
file_hash: Hash-Wert der Datei für Deduplizierung
|
||||
|
||||
Returns:
|
||||
Das erstellte FileItem-Objekt
|
||||
"""
|
||||
file_data = {
|
||||
"mandate_id": self.mandate_id,
|
||||
"user_id": self.user_id,
|
||||
"name": name,
|
||||
"mime_type": mime_type,
|
||||
"size": size,
|
||||
"data": data, # Jetzt wird der Dateiinhalt direkt in der Datenbank gespeichert
|
||||
"file_hash": file_hash,
|
||||
"creation_date": self._get_current_timestamp()
|
||||
}
|
||||
|
|
@ -289,14 +303,14 @@ class LucyDOMInterface:
|
|||
|
||||
def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Aktualisiert eine vorhandene Datei
|
||||
Aktualisiert die Metadaten einer vorhandenen Datei ohne die Binärdaten zu beeinflussen.
|
||||
|
||||
Args:
|
||||
file_id: ID der zu aktualisierenden Datei
|
||||
update_data: Dictionary mit zu aktualisierenden Feldern
|
||||
|
||||
Returns:
|
||||
Das aktualisierte Datei-Objekt ohne Binärdaten
|
||||
Das aktualisierte FileItem-Objekt
|
||||
"""
|
||||
# Prüfen, ob die Datei existiert
|
||||
file = self.get_file(file_id)
|
||||
|
|
@ -304,17 +318,11 @@ class LucyDOMInterface:
|
|||
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
# Datei aktualisieren
|
||||
updated_file = self.db.record_modify("files", file_id, update_data)
|
||||
|
||||
# Binärdaten aus der Antwort entfernen
|
||||
if "data" in updated_file:
|
||||
del updated_file["data"]
|
||||
|
||||
return updated_file
|
||||
return self.db.record_modify("files", file_id, update_data)
|
||||
|
||||
def delete_file(self, file_id: int) -> bool:
|
||||
"""
|
||||
Löscht eine Datei aus der Datenbank.
|
||||
Löscht eine Datei aus der Datenbank (Metadaten und Inhalt).
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
|
|
@ -339,11 +347,20 @@ class LucyDOMInterface:
|
|||
other_references = [f for f in self.db.get_recordset("files", record_filter={"file_hash": file_hash})
|
||||
if f.get("id") != file_id]
|
||||
|
||||
# If other files reference this content, only delete the database entry
|
||||
# If other files reference this content, only delete the database entry for FileItem
|
||||
if other_references:
|
||||
logger.info(f"Andere Referenzen auf den Dateiinhalt gefunden, nur DB-Eintrag wird gelöscht: {file_id}")
|
||||
logger.info(f"Andere Referenzen auf den Dateiinhalt gefunden, nur FileItem wird gelöscht: {file_id}")
|
||||
else:
|
||||
# Lösche auch den Dateiinhalt in der FileData-Tabelle
|
||||
try:
|
||||
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
||||
if file_data_entries:
|
||||
self.db.record_delete("file_data", file_id)
|
||||
logger.info(f"FileData für Datei {file_id} gelöscht")
|
||||
except Exception as e:
|
||||
logger.warning(f"Fehler beim Löschen des FileData für Datei {file_id}: {str(e)}")
|
||||
|
||||
# Lösche den Datenbankeintrag
|
||||
# Lösche den FileItem-Eintrag
|
||||
return self.db.record_delete("files", file_id)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
|
|
@ -356,9 +373,165 @@ class LucyDOMInterface:
|
|||
logger.error(f"Fehler beim Löschen der Datei {file_id}: {str(e)}")
|
||||
raise FileDeletionError(f"Fehler beim Löschen der Datei: {str(e)}")
|
||||
|
||||
|
||||
# FileData Methoden - Binärdaten-basierte Operationen
|
||||
|
||||
def create_file_data(self, file_id: int, data: bytes) -> bool:
|
||||
"""
|
||||
Speichert die Binärdaten einer Datei in der Datenbank als Base64-String.
|
||||
|
||||
Args:
|
||||
file_id: ID der zugehörigen Datei
|
||||
data: Binärdaten
|
||||
|
||||
Returns:
|
||||
True bei Erfolg, False bei Fehler
|
||||
"""
|
||||
try:
|
||||
import base64
|
||||
|
||||
# Convert binary data to base64 string
|
||||
if isinstance(data, bytes):
|
||||
encoded_data = base64.b64encode(data).decode('utf-8')
|
||||
logger.debug(f"Converted {len(data)} bytes to base64 string of length {len(encoded_data)}")
|
||||
else:
|
||||
logger.warning(f"Data is not bytes, but {type(data)}. Attempting to handle...")
|
||||
# Try to convert to bytes if it's not already
|
||||
if isinstance(data, str):
|
||||
# Check if it might already be base64 encoded
|
||||
try:
|
||||
# See if it's valid base64
|
||||
base64.b64decode(data)
|
||||
# If no error, assume it's already encoded
|
||||
encoded_data = data
|
||||
logger.info(f"Data appears to be already base64 encoded, using as is")
|
||||
except:
|
||||
# Not base64, so encode the string as bytes then to base64
|
||||
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
||||
logger.info(f"Converted string to base64")
|
||||
else:
|
||||
# For other types, convert to string first
|
||||
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
||||
logger.warning(f"Converted non-standard type to base64")
|
||||
|
||||
# Create the file_data record with encoded data
|
||||
file_data = {
|
||||
"id": file_id,
|
||||
"data": encoded_data
|
||||
}
|
||||
|
||||
self.db.record_create("file_data", file_data)
|
||||
logger.info(f"Successfully stored encoded data for file {file_id}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Speichern der Binärdaten für Datei {file_id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def get_file_data(self, file_id: int) -> Optional[bytes]:
|
||||
"""
|
||||
Gibt die Binärdaten einer Datei zurück.
|
||||
Konvertiert Base64-String aus der Datenbank zurück zu bytes.
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
|
||||
Returns:
|
||||
Binärdaten oder None, wenn nicht gefunden
|
||||
"""
|
||||
import base64
|
||||
|
||||
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
||||
if file_data_entries and "data" in file_data_entries[0]:
|
||||
encoded_data = file_data_entries[0]["data"]
|
||||
|
||||
try:
|
||||
# Check if it's a string (most likely base64)
|
||||
if isinstance(encoded_data, str):
|
||||
try:
|
||||
# Try to decode base64
|
||||
binary_data = base64.b64decode(encoded_data)
|
||||
logger.debug(f"Successfully decoded base64 string to {len(binary_data)} bytes")
|
||||
return binary_data
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to decode base64 data: {str(e)}")
|
||||
# If it's not valid base64, return as bytes
|
||||
return encoded_data.encode('utf-8')
|
||||
# If it's already bytes (shouldn't happen with model change)
|
||||
elif isinstance(encoded_data, bytes):
|
||||
logger.warning(f"Data was already bytes, no conversion needed")
|
||||
return encoded_data
|
||||
else:
|
||||
logger.error(f"Unexpected data type in database: {type(encoded_data)}")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file data: {str(e)}")
|
||||
return None
|
||||
else:
|
||||
logger.warning(f"No data found for file ID {file_id}")
|
||||
return None
|
||||
|
||||
def update_file_data(self, file_id: int, data: Union[bytes, str]) -> bool:
|
||||
"""
|
||||
Aktualisiert die Binärdaten einer Datei in der Datenbank.
|
||||
Konvertiert bytes zu Base64-String für die Speicherung.
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
data: Neue Binärdaten oder kodierte Daten
|
||||
|
||||
Returns:
|
||||
True bei Erfolg, False bei Fehler
|
||||
"""
|
||||
try:
|
||||
import base64
|
||||
|
||||
# Convert data to base64 string if it's bytes
|
||||
if isinstance(data, bytes):
|
||||
encoded_data = base64.b64encode(data).decode('utf-8')
|
||||
logger.debug(f"Converted {len(data)} bytes to base64 string")
|
||||
elif isinstance(data, str):
|
||||
# Check if it might already be base64 encoded
|
||||
try:
|
||||
# See if it's valid base64
|
||||
base64.b64decode(data)
|
||||
# If no error, assume it's already encoded
|
||||
encoded_data = data
|
||||
logger.debug(f"Data appears to be already base64 encoded, using as is")
|
||||
except:
|
||||
# Not base64, so encode the string as bytes then to base64
|
||||
encoded_data = base64.b64encode(data.encode('utf-8')).decode('utf-8')
|
||||
logger.debug(f"Converted string to base64")
|
||||
else:
|
||||
# For other types, convert to string first
|
||||
encoded_data = base64.b64encode(str(data).encode('utf-8')).decode('utf-8')
|
||||
logger.warning(f"Converted non-standard type to base64")
|
||||
|
||||
# Check if a record already exists
|
||||
file_data_entries = self.db.get_recordset("file_data", record_filter={"id": file_id})
|
||||
|
||||
if file_data_entries:
|
||||
# Update the existing record
|
||||
self.db.record_modify("file_data", file_id, {"data": encoded_data})
|
||||
logger.info(f"Updated existing file data for file ID {file_id}")
|
||||
else:
|
||||
# Create a new record
|
||||
file_data = {
|
||||
"id": file_id,
|
||||
"data": encoded_data
|
||||
}
|
||||
self.db.record_create("file_data", file_data)
|
||||
logger.info(f"Created new file data for file ID {file_id}")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Aktualisieren der Binärdaten für Datei {file_id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Speichert eine hochgeladene Datei direkt in der Datenbank.
|
||||
Speichert eine hochgeladene Datei in der Datenbank.
|
||||
Metadaten werden in der 'files'-Tabelle gespeichert,
|
||||
Binärdaten in der 'file_data'-Tabelle als Base64-String.
|
||||
|
||||
Args:
|
||||
file_content: Binärdaten der Datei
|
||||
|
|
@ -385,11 +558,6 @@ class LucyDOMInterface:
|
|||
if existing_file:
|
||||
# Simply return the existing file metadata
|
||||
logger.info(f"Duplikat gefunden für {file_name}: {existing_file['id']}")
|
||||
# Entferne die Binärdaten aus der Antwort
|
||||
if "data" in existing_file:
|
||||
existing_file_copy = existing_file.copy()
|
||||
del existing_file_copy["data"]
|
||||
return existing_file_copy
|
||||
return existing_file
|
||||
|
||||
# MIME-Typ bestimmen
|
||||
|
|
@ -398,28 +566,25 @@ class LucyDOMInterface:
|
|||
# Dateigröße bestimmen
|
||||
file_size = len(file_content)
|
||||
|
||||
# Speichere in der Datenbank
|
||||
# 1. Speichere Metadaten in der 'files'-Tabelle
|
||||
logger.info(f"Saving file metadata to database for file: {file_name}")
|
||||
db_file = self.create_file(
|
||||
name=file_name,
|
||||
mime_type=mime_type,
|
||||
size=file_size,
|
||||
data=file_content, # Dateiinhalt direkt in der Datenbank speichern
|
||||
file_hash=file_hash
|
||||
)
|
||||
|
||||
# 2. Speichere Binärdaten als Base64-String in der 'file_data'-Tabelle
|
||||
logger.info(f"Saving file content to database for file: {file_name}")
|
||||
self.create_file_data(db_file["id"], file_content)
|
||||
|
||||
# Debug: Verify database record was created
|
||||
if not db_file:
|
||||
logger.warning(f"Database record for file {file_name} was not created properly")
|
||||
else:
|
||||
logger.info(f"Database record created for file {file_name}")
|
||||
|
||||
# Entferne die Binärdaten aus der Antwort
|
||||
if "data" in db_file:
|
||||
db_file_copy = db_file.copy()
|
||||
del db_file_copy["data"]
|
||||
db_file = db_file_copy
|
||||
|
||||
logger.info(f"File upload process completed for: {file_name}")
|
||||
return db_file
|
||||
|
||||
|
|
@ -429,7 +594,7 @@ class LucyDOMInterface:
|
|||
|
||||
def download_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Gibt eine Datei zum Download zurück.
|
||||
Gibt eine Datei zum Download zurück, einschließlich Binärdaten.
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
|
|
@ -438,20 +603,24 @@ class LucyDOMInterface:
|
|||
Dictionary mit Dateidaten und -metadaten oder None, wenn nicht gefunden
|
||||
"""
|
||||
try:
|
||||
# Holen der vollständigen Datei inklusive Binärdaten aus der Datenbank
|
||||
files = self.db.get_recordset("files", record_filter={"id": file_id})
|
||||
# 1. Metadaten aus der 'files'-Tabelle holen
|
||||
file = self.get_file(file_id)
|
||||
|
||||
if not files or "data" not in files[0]:
|
||||
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden oder hat keine Daten")
|
||||
if not file:
|
||||
raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
file = files[0]
|
||||
# 2. Binärdaten aus der 'file_data'-Tabelle holen
|
||||
file_content = self.get_file_data(file_id)
|
||||
|
||||
if file_content is None:
|
||||
raise FileNotFoundError(f"Binärdaten für Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
return {
|
||||
"id": file_id,
|
||||
"name": file.get("name", f"file_{file_id}"),
|
||||
"content_type": file.get("mime_type", "application/octet-stream"),
|
||||
"size": file.get("size", len(file.get("data", b""))),
|
||||
"content": file.get("data")
|
||||
"size": file.get("size", len(file_content)),
|
||||
"content": file_content
|
||||
}
|
||||
except FileNotFoundError as e:
|
||||
# Re-raise FileNotFoundError as is
|
||||
|
|
|
|||
|
|
@ -1,139 +0,0 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Label(BaseModel):
|
||||
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
|
||||
default: str
|
||||
translations: Dict[str, str] = {}
|
||||
|
||||
def get_label(self, language: str = None):
|
||||
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
|
||||
if language and language in self.translations:
|
||||
return self.translations[language]
|
||||
return self.default
|
||||
|
||||
|
||||
class Prompt(BaseModel):
|
||||
"""Datenmodell für einen Prompt"""
|
||||
id: int = Field(description="Eindeutige ID des Prompts")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
user_id: int = Field(description="ID des Erstellers")
|
||||
content: str = Field(description="Inhalt des Prompts")
|
||||
name: str = Field(description="Anzeigename des Prompts")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
|
||||
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
|
||||
"content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}),
|
||||
"name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}),
|
||||
}
|
||||
|
||||
|
||||
class FileItem(BaseModel):
|
||||
"""Datenmodell für ein Datenobjekt"""
|
||||
id: int = Field(description="Eindeutige ID des Datenobjekts")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
user_id: int = Field(description="ID des Erstellers")
|
||||
name: str = Field(description="Name des Datenobjekts")
|
||||
mime_type: str = Field(description="Typ des Datenobjekts MIME-Typ")
|
||||
size: Optional[str] = Field(None, description="Größe des Datenobjekts")
|
||||
file_hash: str = Field(description="Hash code")
|
||||
data: bytes = Field(description="Inhalt der Datei")
|
||||
creation_date: Optional[str] = Field(None, description="Datum des Hochladens")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
|
||||
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
|
||||
"name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}),
|
||||
"mime_type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}),
|
||||
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}),
|
||||
"file_hash": Label(default="File-Hash", translations={"en": "Hash", "fr": "Hash"}),
|
||||
"data": Label(default="Daten", translations={"en": "Data", "fr": "Contenu"}),
|
||||
"creation_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"})
|
||||
}
|
||||
|
||||
|
||||
# Workflow-Modellklassen
|
||||
|
||||
class DocumentContent(BaseModel):
|
||||
"""Inhalt eines Dokuments im Workflow"""
|
||||
sequence_nr: Optional[int] = Field(1,description="Sequenz-Nummer des Inhaltes im Quelldokument")
|
||||
name: str = Field(description="Optionale Bezeichnung")
|
||||
ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png")
|
||||
content_type: str = Field(description="MIME-Typ")
|
||||
data: bytes = Field(description="Inhalt der Datei")
|
||||
|
||||
class Document(BaseModel):
|
||||
"""Dokument im Workflow """
|
||||
id: str = Field(description="Eindeutige ID des Dokuments")
|
||||
file_id: int = Field(description="Quelldatei")
|
||||
contents: List[DocumentContent] = Field(description="Dokumentinhalte")
|
||||
|
||||
class DataStats(BaseModel):
|
||||
"""Statistiken für Performance und Datennutzung"""
|
||||
processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden")
|
||||
token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)")
|
||||
bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes")
|
||||
bytes_received: Optional[int] = Field(None, description="Empfangene Bytes")
|
||||
|
||||
class Message(BaseModel):
|
||||
"""Nachrichtenobjekt im Workflow"""
|
||||
id: str = Field(description="Eindeutige ID der Nachricht")
|
||||
workflow_id: str = Field(description="Referenz zum übergeordneten Workflow")
|
||||
parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht")
|
||||
started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung")
|
||||
finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss")
|
||||
sequence_no: int = Field(description="Sequenznummer für Sortierung")
|
||||
|
||||
status: str = Field(description="Status der Nachricht ('processing', 'completed')")
|
||||
role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')")
|
||||
|
||||
data_stats: Optional[DataStats] = Field(None, description="Statistiken")
|
||||
documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht")
|
||||
content: Optional[str] = Field(None, description="Textinhalt der Nachricht")
|
||||
agent_name: Optional[str] = Field(None, description="Name des verwendeten Agenten")
|
||||
|
||||
class Workflow(BaseModel):
|
||||
"""Workflow-Objekt für Multi-Agent-System"""
|
||||
id: str = Field(description="Eindeutige ID des Workflows")
|
||||
name: Optional[str] = Field(None, description="Name des Workflows")
|
||||
mandate_id: int = Field(description="ID des Mandanten")
|
||||
user_id: int = Field(description="ID des Benutzers")
|
||||
status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')")
|
||||
started_at: str = Field(description="Startzeitpunkt")
|
||||
last_activity: str = Field(description="Zeitpunkt der letzten Aktivität")
|
||||
last_message_id: str = Field(description="The last registered message")
|
||||
|
||||
data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken")
|
||||
messages: List[Message] = Field(default=[], description="Nachrichtenverlauf")
|
||||
logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge")
|
||||
|
||||
# Anfragemodelle für die API
|
||||
|
||||
class WorkflowCreateRequest(BaseModel):
|
||||
"""Anfrage zur Erstellung eines neuen Workflows"""
|
||||
name: Optional[str] = Field(None, description="Name des Workflows")
|
||||
prompt: str = Field(description="Zu verwendender Prompt")
|
||||
files: List[int] = Field(default=[], description="Liste von FileItem ID")
|
||||
|
||||
class UserInputRequest(BaseModel):
|
||||
"""Anfrage für Benutzereingabe an einen laufenden Workflow"""
|
||||
prompt: str = Field(description="Nachricht des Benutzers")
|
||||
files: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID")
|
||||
|
||||
|
|
@ -1,6 +1,5 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Label(BaseModel):
|
||||
|
|
@ -39,7 +38,7 @@ class Prompt(BaseModel):
|
|||
|
||||
|
||||
class FileItem(BaseModel):
|
||||
"""Datenmodell für ein Datenobjekt"""
|
||||
"""Datenmodell für ein File"""
|
||||
id: int = Field(description="Eindeutige ID des Datenobjekts")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
user_id: int = Field(description="ID des Erstellers")
|
||||
|
|
@ -47,7 +46,6 @@ class FileItem(BaseModel):
|
|||
mime_type: str = Field(description="Typ des Datenobjekts MIME-Typ")
|
||||
size: Optional[int] = Field(None, description="Größe des Datenobjekts in Bytes")
|
||||
file_hash: str = Field(description="Hash code für Deduplizierung")
|
||||
data: bytes = Field(description="Binärer Inhalt der Datei")
|
||||
creation_date: Optional[str] = Field(None, description="Datum des Hochladens")
|
||||
workflow_id: Optional[str] = Field(None, description="ID des zugehörigen Workflows, falls vorhanden")
|
||||
|
||||
|
|
@ -65,25 +63,32 @@ class FileItem(BaseModel):
|
|||
"mime_type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}),
|
||||
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}),
|
||||
"file_hash": Label(default="File-Hash", translations={"en": "Hash", "fr": "Hash"}),
|
||||
"data": Label(default="Daten", translations={"en": "Data", "fr": "Contenu"}),
|
||||
"creation_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
|
||||
"workflow_id": Label(default="Workflow-ID", translations={"en": "Workflow ID", "fr": "ID du workflow"})
|
||||
}
|
||||
|
||||
class FileData(BaseModel):
|
||||
"""Datenmodell für den File-Inhalt"""
|
||||
id: int = Field(description="Eindeutige ID des Datenobjekts")
|
||||
data: str = Field(description="Binärer Inhalt der Datei als Base64-String")
|
||||
|
||||
# Workflow-Modellklassen
|
||||
|
||||
class DocumentContent(BaseModel):
|
||||
"""Inhalt eines Dokuments im Workflow"""
|
||||
sequence_nr: Optional[int] = Field(1,description="Sequenz-Nummer des Inhaltes im Quelldokument")
|
||||
name: str = Field(description="Optionale Bezeichnung")
|
||||
sequence_nr: int = Field(1, description="Sequenz-Nummer des Inhaltes im Quelldokument")
|
||||
name: str = Field(description="Bezeichnung")
|
||||
ext: str = Field(description="Content extension for export: txt, csv, json, jpg, png")
|
||||
content_type: str = Field(description="MIME-Typ")
|
||||
data: bytes = Field(description="Inhalt der Datei")
|
||||
data: str = Field(description="Binärer Inhalt der Daten als Base64-String")
|
||||
summary: str = Field(description="Zusammenfassung des Datei-Inhaltes")
|
||||
metadata: Dict[str, Any] = Field(default_factory=dict, description="Metadaten zum Inhalt, wie z.B. is_text Flag, Format-Informationen, Encoding usw.")
|
||||
|
||||
class Document(BaseModel):
|
||||
"""Dokument im Workflow - Referenziert direkt eine Datei in der Datenbank"""
|
||||
id: str = Field(description="Eindeutige ID des Dokuments")
|
||||
name: str = Field(description="Name des Datenobjekts")
|
||||
ext: str = Field(description="Extension des Datenobjekts")
|
||||
file_id: int = Field(description="ID der referenzierten Datei in der Datenbank")
|
||||
contents: List[DocumentContent] = Field(description="Dokumentinhalte")
|
||||
|
||||
|
|
@ -137,4 +142,4 @@ class WorkflowCreateRequest(BaseModel):
|
|||
class UserInputRequest(BaseModel):
|
||||
"""Anfrage für Benutzereingabe an einen laufenden Workflow"""
|
||||
prompt: str = Field(description="Nachricht des Benutzers")
|
||||
files: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID")
|
||||
listFileId: List[int] = Field(default=[], description="Liste zusätzlicher FileItem ID")
|
||||
|
|
@ -1,6 +1,45 @@
|
|||
....................... TASKS
|
||||
|
||||
|
||||
run agent, then save output files to db
|
||||
. files save-> fileid list, ALWAYS TO WRITE NEW FILES!
|
||||
. chat_message_to_workflow(role, agent,chatmsg, workflow): with answer and fileidlist
|
||||
|
||||
|
||||
----------------------- OPEN
|
||||
|
||||
PRIO1:
|
||||
|
||||
Split big files into content-parts
|
||||
|
||||
|
||||
|
||||
PRIO2:
|
||||
|
||||
implement cleanup routines for files in lucydom_interface (File_Management_CLEANUP_INTERVAL): temp older than interval, all orphaned
|
||||
|
||||
frontend: no labels definition
|
||||
|
||||
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
|
||||
|
||||
sharepoint connector with document search, content search, content extraction
|
||||
|
||||
add connector to myoutlook
|
||||
|
||||
frontend to react
|
||||
|
||||
|
||||
|
||||
----------------------- DONE
|
||||
|
||||
|
||||
|
||||
alle expliziten prompt ersetzen.
|
||||
kannst du mir zusammenstellen, wo es überall in chat.py explizite texte an den user in den messages drin hat? - stell dir vor, es arbeitet ein japaner, der würde es nicht verstehen. die referenzen der code-elemente reicht.
|
||||
|
||||
|
||||
|
||||
|
||||
die agents registry bereinigen inkl agents
|
||||
|
||||
die file upload & dragdrop bereinigen, dass einfach file in db geschrieben wird mit file im file-object
|
||||
|
|
@ -14,35 +53,9 @@ Workflow:
|
|||
|
||||
|
||||
|
||||
----------------------- OPEN
|
||||
|
||||
PRIO1:
|
||||
|
||||
Split big files into content-parts
|
||||
|
||||
Cleanup routine for files older than xxx days in upload dir
|
||||
|
||||
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
|
||||
|
||||
frontend to react
|
||||
|
||||
|
||||
|
||||
|
||||
PRIO2:
|
||||
|
||||
implement cleanup routines for files in lucydom_interface (File_Management_CLEANUP_INTERVAL): temp older than interval, all orphaned
|
||||
|
||||
frontend: no labels definition
|
||||
|
||||
sharepoint connector with document search, content search, content extraction
|
||||
|
||||
add connector to myoutlook
|
||||
|
||||
|
||||
|
||||
----------------------- DONE
|
||||
annst du bitte den Code Vorschlag von Dir als class "ChatManager" ins modul "chat.py" umbauen und mir diese class liefern. hier zusätzliche infos und dokumente.
|
||||
kannst du bitte den Code Vorschlag von Dir als class "ChatManager" ins modul "chat.py" umbauen und mir diese class liefern. hier zusätzliche infos und dokumente.
|
||||
|
||||
für die implementierung der funktionen bitte die beiliegenden module als grundlage verwenden, aber allen code neu erstellen. denn die heutigen codes sind viel zu lange haben zuviele details auf allen levels drin. die implementierung der funktionen soll ebenfalls high-level sein, indem alle detail-ausführungen in grundlagen-funktionen ausgelagert werden.
|
||||
|
||||
|
|
|
|||
|
|
@ -8,30 +8,37 @@ pydantic==1.10.13 # Ältere Version ohne Rust-Abhängigkeit
|
|||
## Authentication & Security
|
||||
python-jose==3.3.0
|
||||
passlib==1.7.4
|
||||
argon2-cffi>=21.3.0 # Für Passwort-Hashing in gateway_interface.py
|
||||
|
||||
## Database
|
||||
mysql-connector-python==8.1.0
|
||||
|
||||
## PDF & Document Processing
|
||||
reportlab==4.0.4
|
||||
fitz
|
||||
PyMuPDF>=1.23.7 # Statt dem ungenauen 'fitz'
|
||||
PyPDF2==3.0.1
|
||||
python-docx>=0.8.11 # Für Word-Dokumente
|
||||
openpyxl>=3.1.2 # Für Excel-Dateien
|
||||
|
||||
## Data Processing & Analysis
|
||||
numpy==1.26.3 # Version die mit pandas und matplotlib kompatibel ist
|
||||
pandas==2.2.3 # Aktuelle Version beibehalten
|
||||
|
||||
FuzzyTM>=0.4.0
|
||||
numpy==1.26.3 # Version die mit pandas und matplotlib kompatibel ist
|
||||
pandas==2.2.3 # Aktuelle Version beibehalten
|
||||
|
||||
## Data Visualization
|
||||
matplotlib==3.8.0 # Aktuelle Version beibehalten
|
||||
matplotlib==3.8.0 # Aktuelle Version beibehalten
|
||||
seaborn==0.13.0
|
||||
plotly==5.18.0
|
||||
|
||||
## Web Scraping & HTTP
|
||||
beautifulsoup4==4.12.2
|
||||
requests==2.31.0
|
||||
chardet>=5.0.0 # Für Zeichensatzerkennung bei Webinhalten
|
||||
|
||||
## Image Processing
|
||||
Pillow>=10.0.0 # Für Bildverarbeitung (als PIL importiert)
|
||||
|
||||
## Utilities
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
|
||||
## Dependencies for trio (used by httpx)
|
||||
sortedcontainers>=2.4.0 # Required by trio
|
||||
|
|
@ -7,7 +7,7 @@ from dataclasses import dataclass
|
|||
import io
|
||||
|
||||
from modules.auth import get_current_active_user, get_user_context
|
||||
from modules.utility import APP_CONFIG
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
# Import interfaces
|
||||
from modules.lucydom_interface import get_lucydom_interface, FileError, FileNotFoundError, FileStorageError, FilePermissionError, FileDeletionError
|
||||
|
|
@ -75,7 +75,7 @@ async def get_files(current_user: Dict[str, Any] = Depends(get_current_active_us
|
|||
try:
|
||||
context = await get_context(current_user)
|
||||
|
||||
# Alle Dateien generisch abrufen
|
||||
# Alle Dateien generisch abrufen - nur Metadaten, keine Binärdaten
|
||||
files = context.interface_data.get_all_files()
|
||||
return files
|
||||
except Exception as e:
|
||||
|
|
@ -106,7 +106,7 @@ async def upload_file(
|
|||
if len(file_content) > max_size:
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
|
||||
detail=f"Datei zu groß. Maximale Größe: {APP_CONFIG.get("File_Management_MAX_UPLOAD_SIZE_MB")}MB"
|
||||
detail=f"Datei zu groß. Maximale Größe: {APP_CONFIG.get('File_Management_MAX_UPLOAD_SIZE_MB')}MB"
|
||||
)
|
||||
|
||||
# Datei über das LucyDOM-Interface in der Datenbank speichern
|
||||
|
|
@ -141,12 +141,14 @@ async def get_file(
|
|||
current_user: Dict[str, Any] = Depends(get_current_active_user)
|
||||
):
|
||||
"""
|
||||
Gibt eine Datei anhand ihrer ID direkt aus der Datenbank zurück.
|
||||
Gibt eine Datei anhand ihrer ID zum Download zurück.
|
||||
Ruft sowohl Metadaten als auch Binärdaten ab.
|
||||
"""
|
||||
try:
|
||||
context = await get_context(current_user)
|
||||
|
||||
# Datei über das LucyDOM-Interface aus der Datenbank abrufen
|
||||
# Verwendet die download_file-Methode, die nun Metadaten und Binärdaten kombiniert
|
||||
file_data = context.interface_data.download_file(file_id)
|
||||
|
||||
# Datei zurückgeben
|
||||
|
|
@ -192,11 +194,13 @@ async def delete_file(
|
|||
):
|
||||
"""
|
||||
Löscht eine Datei anhand ihrer ID aus der Datenbank.
|
||||
Entfernt sowohl die Metadaten als auch die Binärdaten.
|
||||
"""
|
||||
try:
|
||||
context = await get_context(current_user)
|
||||
|
||||
# Datei über das LucyDOM-Interface löschen
|
||||
# Die Methode kümmert sich nun um das Löschen beider Tabellen (files und file_data)
|
||||
context.interface_data.delete_file(file_id)
|
||||
|
||||
# Erfolgreiche Löschung ohne Inhalt zurückgeben (204 No Content)
|
||||
|
|
@ -237,7 +241,7 @@ async def get_file_stats(
|
|||
try:
|
||||
context = await get_context(current_user)
|
||||
|
||||
# Alle Dateien abrufen
|
||||
# Alle Dateien abrufen - nur Metadaten
|
||||
all_files = context.interface_data.get_all_files()
|
||||
|
||||
# Statistiken berechnen
|
||||
|
|
|
|||
227
test_gateway.py
227
test_gateway.py
|
|
@ -1,227 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Testskript zum Erstellen eines Workflows mit Prompt und Datei über den Gateway.
|
||||
"""
|
||||
|
||||
import requests
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import sys
|
||||
from datetime import datetime
|
||||
|
||||
# Konfiguration
|
||||
API_BASE_URL = "http://localhost:8000" # Anpassen an deine Gateway-URL
|
||||
API_TOKEN = "your_api_token_here" # Dein API-Token
|
||||
|
||||
# Headers für Authentifizierung
|
||||
HEADERS = {
|
||||
"Authorization": f"Bearer {API_TOKEN}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
def log_message(message):
|
||||
"""Gibt eine formatierte Nachricht mit Zeitstempel aus"""
|
||||
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||
print(f"[{timestamp}] {message}")
|
||||
|
||||
def upload_file(file_path):
|
||||
"""Lädt eine Datei hoch und gibt die Datei-ID zurück"""
|
||||
log_message(f"Lade Datei hoch: {file_path}")
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
log_message(f"FEHLER: Datei nicht gefunden: {file_path}")
|
||||
return None
|
||||
|
||||
# Multipart-Formular für Datei-Upload vorbereiten
|
||||
file_name = os.path.basename(file_path)
|
||||
files = {
|
||||
'file': (file_name, open(file_path, 'rb'), 'application/octet-stream')
|
||||
}
|
||||
|
||||
# Datei hochladen
|
||||
upload_url = f"{API_BASE_URL}/api/files/upload"
|
||||
response = requests.post(
|
||||
upload_url,
|
||||
headers={"Authorization": f"Bearer {API_TOKEN}"}, # Nur Authorization-Header
|
||||
files=files
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
log_message(f"FEHLER: Datei-Upload fehlgeschlagen. Status: {response.status_code}")
|
||||
log_message(f"Response: {response.text}")
|
||||
return None
|
||||
|
||||
# Datei-ID extrahieren
|
||||
file_data = response.json()
|
||||
file_id = file_data.get("id")
|
||||
log_message(f"Datei erfolgreich hochgeladen. ID: {file_id}")
|
||||
|
||||
return file_id
|
||||
|
||||
def create_workflow(prompt, file_id=None):
|
||||
"""Erstellt einen neuen Workflow mit Prompt und optionaler Datei"""
|
||||
log_message("Erstelle neuen Workflow...")
|
||||
|
||||
# Nachricht für den Workflow vorbereiten
|
||||
user_input = {
|
||||
"message": prompt
|
||||
}
|
||||
|
||||
# Wenn eine Datei-ID vorhanden ist, füge sie hinzu
|
||||
if file_id:
|
||||
user_input["additional_files"] = [file_id]
|
||||
|
||||
# Workflow erstellen
|
||||
workflow_url = f"{API_BASE_URL}/api/workflows/user-input"
|
||||
response = requests.post(
|
||||
workflow_url,
|
||||
headers=HEADERS,
|
||||
json=user_input
|
||||
)
|
||||
|
||||
if response.status_code >= 400:
|
||||
log_message(f"FEHLER: Workflow-Erstellung fehlgeschlagen. Status: {response.status_code}")
|
||||
log_message(f"Response: {response.text}")
|
||||
return None
|
||||
|
||||
# Workflow-ID extrahieren
|
||||
workflow_data = response.json()
|
||||
workflow_id = workflow_data.get("workflow_id")
|
||||
log_message(f"Workflow erfolgreich erstellt. ID: {workflow_id}")
|
||||
|
||||
return workflow_id
|
||||
|
||||
def poll_workflow_status(workflow_id, max_attempts=20, delay=2):
|
||||
"""Fragt den Status eines Workflows ab und wartet bis zur Fertigstellung"""
|
||||
log_message(f"Prüfe Status des Workflows {workflow_id}...")
|
||||
|
||||
for attempt in range(1, max_attempts + 1):
|
||||
status_url = f"{API_BASE_URL}/api/workflows/{workflow_id}/status"
|
||||
response = requests.get(
|
||||
status_url,
|
||||
headers=HEADERS
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
log_message(f"FEHLER: Status-Abfrage fehlgeschlagen. Status: {response.status_code}")
|
||||
continue
|
||||
|
||||
status_data = response.json()
|
||||
current_status = status_data.get("status")
|
||||
log_message(f"Workflow-Status: {current_status} (Versuch {attempt}/{max_attempts})")
|
||||
|
||||
if current_status in ["completed", "stopped", "failed"]:
|
||||
log_message(f"Workflow ist abgeschlossen mit Status: {current_status}")
|
||||
return status_data
|
||||
|
||||
time.sleep(delay)
|
||||
|
||||
log_message(f"Maximale Anzahl von Versuchen erreicht. Letzter Status: {current_status}")
|
||||
return None
|
||||
|
||||
def get_workflow_messages(workflow_id):
|
||||
"""Ruft alle Nachrichten eines Workflows ab"""
|
||||
log_message(f"Hole Nachrichten für Workflow {workflow_id}...")
|
||||
|
||||
messages_url = f"{API_BASE_URL}/api/workflows/{workflow_id}/messages"
|
||||
response = requests.get(
|
||||
messages_url,
|
||||
headers=HEADERS
|
||||
)
|
||||
|
||||
if response.status_code != 200:
|
||||
log_message(f"FEHLER: Abrufen der Nachrichten fehlgeschlagen. Status: {response.status_code}")
|
||||
return []
|
||||
|
||||
messages = response.json()
|
||||
log_message(f"{len(messages)} Nachrichten gefunden.")
|
||||
|
||||
return messages
|
||||
|
||||
def print_workflow_results(workflow_id):
|
||||
"""Gibt die Ergebnisse eines Workflows aus"""
|
||||
log_message("=== WORKFLOW-ERGEBNISSE ===")
|
||||
|
||||
# Status abrufen
|
||||
status_url = f"{API_BASE_URL}/api/workflows/{workflow_id}/status"
|
||||
status_response = requests.get(status_url, headers=HEADERS)
|
||||
|
||||
if status_response.status_code == 200:
|
||||
status_data = status_response.json()
|
||||
log_message(f"Workflow-Name: {status_data.get('name')}")
|
||||
log_message(f"Status: {status_data.get('status')}")
|
||||
log_message(f"Gestartet: {status_data.get('started_at')}")
|
||||
log_message(f"Letzte Aktivität: {status_data.get('last_activity')}")
|
||||
|
||||
# Nachrichten abrufen und ausgeben
|
||||
messages = get_workflow_messages(workflow_id)
|
||||
log_message(f"Anzahl der Nachrichten: {len(messages)}")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
log_message(f"--- Nachricht {i} ---")
|
||||
log_message(f"Rolle: {msg.get('role')}")
|
||||
|
||||
# Inhalt gekürzt ausgeben (maximal ersten 200 Zeichen)
|
||||
content = msg.get('content', '')
|
||||
if content:
|
||||
if len(content) > 200:
|
||||
log_message(f"Inhalt: {content[:200]}... [gekürzt]")
|
||||
else:
|
||||
log_message(f"Inhalt: {content}")
|
||||
|
||||
# Anzahl der Dokumente ausgeben
|
||||
docs = msg.get('documents', [])
|
||||
if docs:
|
||||
log_message(f"Dokumente: {len(docs)}")
|
||||
for j, doc in enumerate(docs, 1):
|
||||
source = doc.get('source', {})
|
||||
doc_name = source.get('name', f"Dokument {j}")
|
||||
log_message(f" - {doc_name}")
|
||||
|
||||
def main():
|
||||
"""Hauptfunktion zum Testen des Workflows"""
|
||||
# Beispiel-Datei zum Hochladen (Pfad anpassen)
|
||||
file_path = "example.csv" # Hier den Pfad zu deiner Testdatei angeben
|
||||
|
||||
# Prompt für den Workflow
|
||||
test_prompt = """Bitte analysiere die angehängte Datei und erstelle eine Zusammenfassung der wichtigsten Informationen.
|
||||
Wenn es sich um eine CSV-Datei handelt, identifiziere die Spalten und gib mir einen Überblick über die enthaltenen Daten.
|
||||
Erstelle außerdem eine Visualisierung, wenn du Zahlenwerte in der Datei findest."""
|
||||
|
||||
try:
|
||||
# Datei hochladen
|
||||
file_id = upload_file(file_path)
|
||||
if not file_id:
|
||||
log_message("Test abgebrochen: Datei konnte nicht hochgeladen werden.")
|
||||
return False
|
||||
|
||||
# Workflow erstellen
|
||||
workflow_id = create_workflow(test_prompt, file_id)
|
||||
if not workflow_id:
|
||||
log_message("Test abgebrochen: Workflow konnte nicht erstellt werden.")
|
||||
return False
|
||||
|
||||
# Auf Abschluss des Workflows warten
|
||||
workflow_status = poll_workflow_status(workflow_id)
|
||||
if not workflow_status:
|
||||
log_message("Test unvollständig: Timeout beim Warten auf Workflow-Abschluss.")
|
||||
# Weiter machen, um zumindest Teilergebnisse zu sehen
|
||||
|
||||
# Ergebnisse ausgeben
|
||||
print_workflow_results(workflow_id)
|
||||
|
||||
log_message("Test abgeschlossen.")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
log_message(f"FEHLER: Unerwartete Ausnahme: {str(e)}")
|
||||
import traceback
|
||||
log_message(traceback.format_exc())
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
log_message("=== WORKFLOW-TEST GESTARTET ===")
|
||||
success = main()
|
||||
log_message(f"=== WORKFLOW-TEST BEENDET (Erfolgreich: {success}) ===")
|
||||
sys.exit(0 if success else 1)
|
||||
182
test_workflow.py
182
test_workflow.py
|
|
@ -1,182 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Backend-Testskript für die Workflow-Funktionalität mit Prompt und Datei.
|
||||
Dieses Skript testet die Backend-Komponenten direkt, ohne über die API zu gehen.
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import json
|
||||
|
||||
# Pfad zum Projekt-Root hinzufügen, damit Module gefunden werden
|
||||
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
|
||||
|
||||
# Logging konfigurieren
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='[%(asctime)s] %(levelname)s: %(message)s',
|
||||
datefmt='%Y-%m-%d %H:%M:%S'
|
||||
)
|
||||
logger = logging.getLogger("backend_test")
|
||||
|
||||
# Imports aus dem Backend
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
from modules.chat import get_chat_manager
|
||||
|
||||
# Testparameter
|
||||
TEST_MANDATE_ID = 1
|
||||
TEST_USER_ID = 1
|
||||
TEST_FILE_PATH = "d:/temp/prompt_a1.txt" # Pfad zur Testdatei anpassen
|
||||
TEST_FILE_PATH1 = "d:/temp/LF-Nutshell.png" # Pfad zur Testdatei anpassen
|
||||
TEST_PROMPT = """Bitte analysiere die angehängte Datei und erstelle eine Zusammenfassung der wichtigsten Informationen.
|
||||
Erstelle außerdem eine Visualisierung, wenn du Zahlenwerte in der Datei findest."""
|
||||
|
||||
async def upload_test_file():
|
||||
"""Lädt eine Testdatei ins Backend hoch und gibt die Datei-ID zurück"""
|
||||
logger.info(f"Lade Testdatei hoch: {TEST_FILE_PATH}")
|
||||
|
||||
# LucyDOM-Interface initialisieren
|
||||
lucy_interface = get_lucydom_interface(TEST_MANDATE_ID, TEST_USER_ID)
|
||||
|
||||
try:
|
||||
# Prüfen, ob die Datei existiert
|
||||
if not os.path.exists(TEST_FILE_PATH):
|
||||
logger.error(f"Testdatei nicht gefunden: {TEST_FILE_PATH}")
|
||||
return None
|
||||
|
||||
# Datei lesen
|
||||
with open(TEST_FILE_PATH, 'rb') as f:
|
||||
file_content = f.read()
|
||||
|
||||
# Dateinamen extrahieren
|
||||
file_name = os.path.basename(TEST_FILE_PATH)
|
||||
|
||||
# Datei hochladen
|
||||
file_meta = lucy_interface.save_uploaded_file(file_content, file_name)
|
||||
file_id = file_meta.get('id')
|
||||
|
||||
logger.info(f"Datei erfolgreich hochgeladen. ID: {file_id}")
|
||||
return file_meta
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Hochladen der Datei: {str(e)}")
|
||||
return None
|
||||
|
||||
async def create_test_workflow(file_meta):
|
||||
"""Erstellt einen Testworkflow mit dem angegebenen Prompt und der Datei"""
|
||||
logger.info("Erstelle Testworkflow...")
|
||||
|
||||
# Chat-Manager initialisieren
|
||||
chat_manager = get_chat_manager(TEST_MANDATE_ID, TEST_USER_ID)
|
||||
|
||||
# Nachrichtenobjekt vorbereiten
|
||||
message = {
|
||||
"role": "user",
|
||||
"content": TEST_PROMPT,
|
||||
"documents": [file_meta] if file_meta else []
|
||||
}
|
||||
|
||||
try:
|
||||
# Workflow erstellen (neue Workflow-ID wird automatisch generiert)
|
||||
workflow = await chat_manager.workflow_integrate_userinput(message)
|
||||
|
||||
if not workflow:
|
||||
logger.error("Workflow konnte nicht erstellt werden")
|
||||
return None
|
||||
|
||||
workflow_id = workflow.get("id")
|
||||
logger.info(f"Workflow erfolgreich erstellt. ID: {workflow_id}")
|
||||
return workflow
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Workflow-Erstellung: {str(e)}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
def print_workflow_details(workflow):
|
||||
"""Gibt Details zum Workflow aus"""
|
||||
if not workflow:
|
||||
logger.warning("Kein Workflow zum Anzeigen vorhanden")
|
||||
return
|
||||
|
||||
logger.info("=== WORKFLOW-DETAILS ===")
|
||||
logger.info(f"ID: {workflow.get('id')}")
|
||||
logger.info(f"Name: {workflow.get('name')}")
|
||||
logger.info(f"Status: {workflow.get('status')}")
|
||||
logger.info(f"Mandanten-ID: {workflow.get('mandate_id')}")
|
||||
logger.info(f"Benutzer-ID: {workflow.get('user_id')}")
|
||||
logger.info(f"Gestartet: {workflow.get('started_at')}")
|
||||
logger.info(f"Letzte Aktivität: {workflow.get('last_activity')}")
|
||||
|
||||
# Nachrichten ausgeben
|
||||
messages = workflow.get("messages", [])
|
||||
logger.info(f"Anzahl der Nachrichten: {len(messages)}")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
logger.info(f"--- Nachricht {i} ---")
|
||||
logger.info(f"ID: {msg.get('id')}")
|
||||
logger.info(f"Rolle: {msg.get('role')}")
|
||||
logger.info(f"Sequenz: {msg.get('sequence_no')}")
|
||||
logger.info(f"Agent: {msg.get('agent_name')}")
|
||||
|
||||
# Inhalt gekürzt ausgeben
|
||||
content = msg.get('content', '')
|
||||
if content:
|
||||
preview = content[:200] + ('...' if len(content) > 200 else '')
|
||||
logger.info(f"Inhalt: {preview}")
|
||||
|
||||
# Dokumente auflisten
|
||||
documents = msg.get('documents', [])
|
||||
if documents:
|
||||
logger.info(f"Dokumente: {len(documents)}")
|
||||
for j, doc in enumerate(documents, 1):
|
||||
source = doc.get('source', {})
|
||||
doc_name = source.get('name', f"Dokument {j}")
|
||||
logger.info(f" - {doc_name}")
|
||||
|
||||
# Logs ausgeben
|
||||
logs = workflow.get("logs", [])
|
||||
logger.info(f"Anzahl der Logs: {len(logs)}")
|
||||
if len(logs) > 0:
|
||||
logger.info("Letzte 3 Logs:")
|
||||
for log in logs[-3:]:
|
||||
logger.info(f" - [{log.get('timestamp')}] {log.get('message')}")
|
||||
|
||||
async def main():
|
||||
"""Hauptfunktion für den Backend-Test"""
|
||||
logger.info("=== BACKEND WORKFLOW-TEST GESTARTET ===")
|
||||
|
||||
try:
|
||||
# Schritt 1: Testdatei hochladen
|
||||
file_meta = await upload_test_file()
|
||||
if not file_meta:
|
||||
logger.error("Test abgebrochen: Datei konnte nicht hochgeladen werden")
|
||||
return False
|
||||
|
||||
# Schritt 2: Workflow erstellen
|
||||
workflow = await create_test_workflow(file_meta)
|
||||
if not workflow:
|
||||
logger.error("Test abgebrochen: Workflow konnte nicht erstellt werden")
|
||||
return False
|
||||
|
||||
# Schritt 3: Workflow-Details ausgeben
|
||||
print_workflow_details(workflow)
|
||||
|
||||
logger.info("=== BACKEND WORKFLOW-TEST ERFOLGREICH BEENDET ===")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unerwarteter Fehler im Test: {str(e)}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Event-Loop ausführen
|
||||
success = asyncio.run(main())
|
||||
sys.exit(0 if success else 1)
|
||||
229
test_workflow1.py
Normal file
229
test_workflow1.py
Normal file
|
|
@ -0,0 +1,229 @@
|
|||
"""
|
||||
Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads.
|
||||
Demonstriert den vollständigen Workflow von Datei-Upload bis Chat-Ausführung.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Any, List, Tuple
|
||||
from datetime import datetime
|
||||
|
||||
# Logging konfigurieren
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger = logging.getLogger("test_workflow")
|
||||
|
||||
# Pfad zum Projektverzeichnis hinzufügen
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Module importieren
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
from modules.chat import get_chat_manager
|
||||
|
||||
async def create_test_files(mandate_id: int, user_id: int) -> Tuple[int, int]:
|
||||
"""
|
||||
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch.
|
||||
|
||||
Args:
|
||||
mandate_id: ID des Mandanten
|
||||
user_id: ID des Benutzers
|
||||
|
||||
Returns:
|
||||
Tuple mit (text_file_id, image_file_id)
|
||||
"""
|
||||
logger.info("Erstelle Test-Dateien...")
|
||||
|
||||
lucy_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
||||
# Textdatei erstellen
|
||||
text_content = """
|
||||
Dies ist eine Test-Textdatei für den ChatManager-Workflow.
|
||||
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung.
|
||||
|
||||
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten
|
||||
und daraus relevante Informationen zu extrahieren.
|
||||
|
||||
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem
|
||||
Chat-Workflow verwendet werden können.
|
||||
"""
|
||||
text_file_bytes = text_content.encode('utf-8')
|
||||
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
|
||||
text_file_id = text_file["id"]
|
||||
logger.info(f"Textdatei erstellt mit ID: {text_file_id}")
|
||||
|
||||
# Create a simple test image using PIL
|
||||
try:
|
||||
from PIL import Image
|
||||
import io
|
||||
|
||||
# Create a 100x100 red image
|
||||
img = Image.new('RGB', (100, 100), color = 'red')
|
||||
|
||||
# Save to BytesIO
|
||||
img_bytes = io.BytesIO()
|
||||
img.save(img_bytes, format='PNG')
|
||||
img_bytes = img_bytes.getvalue()
|
||||
|
||||
# Upload to database
|
||||
image_file = lucy_interface.save_uploaded_file(img_bytes, "test_image.png")
|
||||
image_file_id = image_file["id"]
|
||||
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
|
||||
|
||||
except ImportError:
|
||||
# Fallback to the original method if PIL is not available
|
||||
png_data = bytes([
|
||||
0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A, # PNG Header
|
||||
# ... rest of your PNG data ...
|
||||
])
|
||||
|
||||
with open("./test_img_orig.png", 'wb') as f:
|
||||
f.write(png_data)
|
||||
|
||||
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
|
||||
image_file_id = image_file["id"]
|
||||
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
|
||||
|
||||
return text_file_id, image_file_id
|
||||
|
||||
|
||||
|
||||
async def run_chat_workflow(mandate_id: int, user_id: int, file_ids: List[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus.
|
||||
|
||||
Args:
|
||||
mandate_id: ID des Mandanten
|
||||
user_id: ID des Benutzers
|
||||
file_ids: Liste der Datei-IDs
|
||||
|
||||
Returns:
|
||||
Das Workflow-Ergebnis
|
||||
"""
|
||||
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}")
|
||||
|
||||
# ChatManager initialisieren
|
||||
chat_manager = get_chat_manager(mandate_id, user_id)
|
||||
|
||||
# Benutzeranfrage erstellen
|
||||
user_input = {
|
||||
"message": "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt.",
|
||||
"additional_fileids": file_ids
|
||||
}
|
||||
|
||||
# Chat-Workflow ausführen
|
||||
workflow_result = await chat_manager.chat_run(user_input)
|
||||
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}")
|
||||
|
||||
return workflow_result
|
||||
|
||||
def analyze_workflow_result(workflow: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Analysiert und gibt Informationen über das Workflow-Ergebnis aus.
|
||||
|
||||
Args:
|
||||
workflow: Das Workflow-Ergebnis
|
||||
"""
|
||||
logger.info("Analysiere Workflow-Ergebnis:")
|
||||
logger.info(f"Workflow-ID: {workflow['id']}")
|
||||
logger.info(f"Status: {workflow['status']}")
|
||||
logger.info(f"Anzahl Nachrichten: {len(workflow.get('messages', []))}")
|
||||
|
||||
for i, message in enumerate(workflow.get('messages', [])):
|
||||
logger.info(f"Nachricht {i+1}:")
|
||||
logger.info(f" Rolle: {message.get('role', 'unbekannt')}")
|
||||
|
||||
# Nur die ersten 100 Zeichen des Inhalts anzeigen
|
||||
content = message.get('content', '')
|
||||
content_preview = content[:100] + '...' if len(content) > 100 else content
|
||||
logger.info(f" Inhalt: {content_preview}")
|
||||
|
||||
# Dokumente in der Nachricht anzeigen
|
||||
documents = message.get('documents', [])
|
||||
logger.info(f" Dokumente: {len(documents)}")
|
||||
for j, doc in enumerate(documents):
|
||||
doc_id = doc.get('id', 'keine ID')
|
||||
file_id = doc.get('file_id', 'keine file_id')
|
||||
logger.info(f" Dokument {j+1}: ID={doc_id}, File-ID={file_id}")
|
||||
|
||||
# Informationen über Inhalte
|
||||
contents = doc.get('contents', [])
|
||||
for k, content in enumerate(contents):
|
||||
content_name = content.get('name', 'kein Name')
|
||||
content_type = content.get('content_type', 'unbekannt')
|
||||
logger.info(f" Inhalt {k+1}: {content_name} ({content_type})")
|
||||
|
||||
# Log-Einträge anzeigen
|
||||
logger.info(f"Logs: {len(workflow.get('logs', []))}")
|
||||
for i, log in enumerate(workflow.get('logs', []))[:10]: # Begrenzung auf 10 Logs
|
||||
log_type = log.get('type', 'info')
|
||||
log_message = log.get('message', '')
|
||||
log_message_preview = log_message[:100] + '...' if len(log_message) > 100 else log_message
|
||||
logger.info(f" Log {i+1} [{log_type}]: {log_message_preview}")
|
||||
|
||||
async def cleanup_test_files(mandate_id: int, user_id: int, file_ids: List[int]) -> None:
|
||||
"""
|
||||
Bereinigt die erstellten Testdateien.
|
||||
|
||||
Args:
|
||||
mandate_id: ID des Mandanten
|
||||
user_id: ID des Benutzers
|
||||
file_ids: Liste der zu löschenden Datei-IDs
|
||||
"""
|
||||
logger.info("Beginne Bereinigung der Testdateien...")
|
||||
|
||||
lucy_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
success = lucy_interface.delete_file(file_id)
|
||||
if success:
|
||||
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht")
|
||||
else:
|
||||
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}")
|
||||
|
||||
logger.info("Bereinigung abgeschlossen")
|
||||
|
||||
async def main():
|
||||
"""
|
||||
Hauptfunktion, die den gesamten Testprozess steuert.
|
||||
"""
|
||||
# Testparameter
|
||||
MANDATE_ID = 1 # Test-Mandanten-ID
|
||||
USER_ID = 1 # Test-Benutzer-ID
|
||||
CLEANUP = True # Bereinigung nach dem Test
|
||||
|
||||
try:
|
||||
logger.info("=== Test-Workflow für ChatManager gestartet ===")
|
||||
|
||||
# Schritt 1: Testdateien erstellen
|
||||
text_file_id, image_file_id = await create_test_files(MANDATE_ID, USER_ID)
|
||||
file_ids = [text_file_id, image_file_id]
|
||||
|
||||
# Schritt 2: Chat-Workflow ausführen
|
||||
workflow_result = await run_chat_workflow(MANDATE_ID, USER_ID, file_ids)
|
||||
|
||||
# Schritt 3: Ergebnis analysieren
|
||||
analyze_workflow_result(workflow_result)
|
||||
|
||||
# Schritt 4: Optional bereinigen
|
||||
if CLEANUP:
|
||||
await cleanup_test_files(MANDATE_ID, USER_ID, file_ids)
|
||||
|
||||
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True)
|
||||
logger.info("=== Test-Workflow mit Fehler beendet ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
373
test_workflow2.py
Normal file
373
test_workflow2.py
Normal file
|
|
@ -0,0 +1,373 @@
|
|||
"""
|
||||
Erweitertes Test-Skript für den ChatManager-Workflow mit simulierten Datei-Uploads.
|
||||
Bietet zusätzliche Konfigurationsmöglichkeiten und detailliertere Tests.
|
||||
"""
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import argparse
|
||||
import json
|
||||
from typing import Dict, Any, List, Tuple, Optional
|
||||
from datetime import datetime
|
||||
|
||||
# Logging konfigurieren
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
)
|
||||
logger = logging.getLogger("test_workflow")
|
||||
|
||||
# Pfad zum Projektverzeichnis hinzufügen
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# Module importieren
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
from modules.chat import get_chat_manager
|
||||
|
||||
class TestConfig:
|
||||
"""Konfigurationsklasse für Testparameter"""
|
||||
def __init__(self):
|
||||
self.mandate_id = 1
|
||||
self.user_id = 1
|
||||
self.cleanup = True
|
||||
self.save_results = True
|
||||
self.results_dir = "test_results"
|
||||
self.test_message = "Analysiere bitte die hochgeladenen Dateien und erkläre mir deren Inhalt."
|
||||
self.text_file_content = """
|
||||
Dies ist eine Test-Textdatei für den ChatManager-Workflow.
|
||||
Sie enthält einige Informationen zum Testen der Dokumentverarbeitung.
|
||||
|
||||
Der ChatManager sollte in der Lage sein, diese Datei zu verarbeiten
|
||||
und daraus relevante Informationen zu extrahieren.
|
||||
|
||||
Diese Datei dient als Beispiel für Text-basierte Dokumente, die in einem
|
||||
Chat-Workflow verwendet werden können.
|
||||
"""
|
||||
|
||||
def parse_args() -> TestConfig:
|
||||
"""Parst Kommandozeilenargumente"""
|
||||
parser = argparse.ArgumentParser(description="Test für ChatManager-Workflow")
|
||||
parser.add_argument("--mandate-id", type=int, default=1, help="ID des Mandanten")
|
||||
parser.add_argument("--user-id", type=int, default=1, help="ID des Benutzers")
|
||||
parser.add_argument("--no-cleanup", action="store_true", help="Testdateien nicht löschen")
|
||||
parser.add_argument("--no-save", action="store_true", help="Ergebnisse nicht speichern")
|
||||
parser.add_argument("--results-dir", type=str, default="test_results", help="Verzeichnis für Ergebnisse")
|
||||
parser.add_argument("--message", type=str, help="Benutzernachricht für den Test")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
config = TestConfig()
|
||||
config.mandate_id = args.mandate_id
|
||||
config.user_id = args.user_id
|
||||
config.cleanup = not args.no_cleanup
|
||||
config.save_results = not args.no_save
|
||||
config.results_dir = args.results_dir
|
||||
if args.message:
|
||||
config.test_message = args.message
|
||||
|
||||
return config
|
||||
|
||||
async def create_test_files(config: TestConfig) -> Tuple[int, int]:
|
||||
"""
|
||||
Erstellt eine Textdatei und ein Bild für Tests und lädt sie in die Datenbank hoch.
|
||||
|
||||
Args:
|
||||
config: Testkonfiguration
|
||||
|
||||
Returns:
|
||||
Tuple mit (text_file_id, image_file_id)
|
||||
"""
|
||||
logger.info("Erstelle Test-Dateien...")
|
||||
|
||||
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
|
||||
|
||||
# Textdatei erstellen
|
||||
text_content = config.text_file_content
|
||||
text_file_bytes = text_content.encode('utf-8')
|
||||
text_file = lucy_interface.save_uploaded_file(text_file_bytes, "test_document.txt")
|
||||
text_file_id = text_file["id"]
|
||||
logger.info(f"Textdatei erstellt mit ID: {text_file_id}")
|
||||
|
||||
# Bilddatei erstellen (einfaches 1x1 PNG)
|
||||
# Base64-kodiertes 1x1 PNG
|
||||
png_data = bytes.fromhex(
|
||||
"89504e470d0a1a0a0000000d49484452000000010000000108060000001f15c4"
|
||||
"89000000017352474200aece1ce90000000467414d410000b18f0bfc61050000"
|
||||
"000970485973000016250000162501495224f00000001974455874536f667477"
|
||||
"617265007777772e696e6b73636170652e6f72679bee3c1a0000000c49444154"
|
||||
"08d763f8ffff3f0005fe02fec1cd59830000000049454e44ae426082"
|
||||
)
|
||||
image_file = lucy_interface.save_uploaded_file(png_data, "test_image.png")
|
||||
image_file_id = image_file["id"]
|
||||
logger.info(f"Bilddatei erstellt mit ID: {image_file_id}")
|
||||
|
||||
return text_file_id, image_file_id
|
||||
|
||||
async def verify_uploaded_files(mandate_id: int, user_id: int, file_ids: List[int]) -> bool:
|
||||
"""
|
||||
Überprüft, ob die hochgeladenen Dateien korrekt in der Datenbank gespeichert wurden
|
||||
|
||||
Args:
|
||||
mandate_id: ID des Mandanten
|
||||
user_id: ID des Benutzers
|
||||
file_ids: Liste der Datei-IDs
|
||||
|
||||
Returns:
|
||||
True, wenn alle Dateien verfügbar sind
|
||||
"""
|
||||
logger.info("Überprüfe hochgeladene Dateien...")
|
||||
|
||||
lucy_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
all_files_available = True
|
||||
|
||||
for file_id in file_ids:
|
||||
file = lucy_interface.get_file(file_id)
|
||||
if file:
|
||||
file_data = lucy_interface.get_file_data(file_id)
|
||||
if file_data:
|
||||
logger.info(f"Datei {file_id} ({file.get('name', 'Unbekannt')}, {file.get('mime_type', 'Unbekannt')}) ist verfügbar")
|
||||
logger.info(f" Größe: {len(file_data)} Bytes")
|
||||
else:
|
||||
logger.error(f"Datei {file_id} hat keine Binärdaten")
|
||||
all_files_available = False
|
||||
else:
|
||||
logger.error(f"Datei mit ID {file_id} nicht in der Datenbank gefunden")
|
||||
all_files_available = False
|
||||
|
||||
return all_files_available
|
||||
|
||||
async def run_chat_workflow(config: TestConfig, file_ids: List[int]) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt einen Chat-Workflow mit gegebenen Datei-IDs aus.
|
||||
|
||||
Args:
|
||||
config: Testkonfiguration
|
||||
file_ids: Liste der Datei-IDs
|
||||
|
||||
Returns:
|
||||
Das Workflow-Ergebnis
|
||||
"""
|
||||
logger.info(f"Starte Chat-Workflow mit Dateien: {file_ids}")
|
||||
|
||||
# ChatManager initialisieren
|
||||
chat_manager = get_chat_manager(config.mandate_id, config.user_id)
|
||||
|
||||
# Benutzeranfrage erstellen
|
||||
user_input = {
|
||||
"message": config.test_message,
|
||||
"additional_fileids": file_ids
|
||||
}
|
||||
|
||||
# Start-Zeit erfassen
|
||||
start_time = datetime.now()
|
||||
|
||||
# Chat-Workflow ausführen
|
||||
workflow_result = await chat_manager.chat_run(user_input)
|
||||
|
||||
# Ende-Zeit und Dauer berechnen
|
||||
end_time = datetime.now()
|
||||
duration = (end_time - start_time).total_seconds()
|
||||
|
||||
logger.info(f"Workflow abgeschlossen mit ID: {workflow_result['id']}")
|
||||
logger.info(f"Dauer: {duration:.2f} Sekunden")
|
||||
|
||||
return workflow_result
|
||||
|
||||
def analyze_workflow_result(workflow: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Analysiert das Workflow-Ergebnis und gibt Statistiken zurück.
|
||||
|
||||
Args:
|
||||
workflow: Das Workflow-Ergebnis
|
||||
|
||||
Returns:
|
||||
Dictionary mit Analyseergebnissen
|
||||
"""
|
||||
logger.info("Analysiere Workflow-Ergebnis:")
|
||||
|
||||
# Basis-Informationen
|
||||
analysis = {
|
||||
"workflow_id": workflow.get("id"),
|
||||
"status": workflow.get("status"),
|
||||
"message_count": len(workflow.get("messages", [])),
|
||||
"log_count": len(workflow.get("logs", [])),
|
||||
"document_count": 0,
|
||||
"roles": {},
|
||||
"document_types": {},
|
||||
"response_sizes": []
|
||||
}
|
||||
|
||||
# Nachrichten analysieren
|
||||
for message in workflow.get("messages", []):
|
||||
# Rollen zählen
|
||||
role = message.get("role", "unknown")
|
||||
if role not in analysis["roles"]:
|
||||
analysis["roles"][role] = 0
|
||||
analysis["roles"][role] += 1
|
||||
|
||||
# Content-Größe bei Antworten
|
||||
if role == "assistant":
|
||||
content = message.get("content", "")
|
||||
analysis["response_sizes"].append(len(content))
|
||||
|
||||
# Dokumente zählen und analysieren
|
||||
documents = message.get("documents", [])
|
||||
analysis["document_count"] += len(documents)
|
||||
|
||||
for doc in documents:
|
||||
contents = doc.get("contents", [])
|
||||
for content in contents:
|
||||
content_type = content.get("content_type", "unknown")
|
||||
if content_type not in analysis["document_types"]:
|
||||
analysis["document_types"][content_type] = 0
|
||||
analysis["document_types"][content_type] += 1
|
||||
|
||||
# Ausgabe für Log
|
||||
logger.info(f"Workflow-ID: {analysis['workflow_id']}")
|
||||
logger.info(f"Status: {analysis['status']}")
|
||||
logger.info(f"Anzahl Nachrichten: {analysis['message_count']}")
|
||||
logger.info(f"Anzahl Dokumente: {analysis['document_count']}")
|
||||
logger.info(f"Rollen-Verteilung: {analysis['roles']}")
|
||||
logger.info(f"Dokumenttypen: {analysis['document_types']}")
|
||||
|
||||
if analysis["response_sizes"]:
|
||||
avg_size = sum(analysis["response_sizes"]) / len(analysis["response_sizes"])
|
||||
logger.info(f"Durchschnittliche Antwortgröße: {avg_size:.2f} Zeichen")
|
||||
|
||||
# Detaillierte Nachrichteninformationen
|
||||
for i, message in enumerate(workflow.get("messages", [])[:5]): # Begrenzung auf 5 Nachrichten
|
||||
logger.info(f"Nachricht {i+1}:")
|
||||
logger.info(f" Rolle: {message.get('role', 'unbekannt')}")
|
||||
|
||||
# Nur die ersten 100 Zeichen des Inhalts anzeigen
|
||||
content = message.get("content", "")
|
||||
content_preview = content[:100] + "..." if len(content) > 100 else content
|
||||
logger.info(f" Inhalt: {content_preview}")
|
||||
|
||||
# Dokumente in der Nachricht anzeigen
|
||||
documents = message.get("documents", [])
|
||||
if documents:
|
||||
logger.info(f" Dokumente: {len(documents)}")
|
||||
for j, doc in enumerate(documents):
|
||||
file_id = doc.get("file_id", "keine file_id")
|
||||
logger.info(f" Dokument {j+1}: File-ID={file_id}")
|
||||
|
||||
return analysis
|
||||
|
||||
def save_test_results(config: TestConfig, workflow: Dict[str, Any], analysis: Dict[str, Any]) -> None:
|
||||
"""
|
||||
Speichert die Testergebnisse in einer Datei.
|
||||
|
||||
Args:
|
||||
config: Testkonfiguration
|
||||
workflow: Das vollständige Workflow-Ergebnis
|
||||
analysis: Die Analyseergebnisse
|
||||
"""
|
||||
if not config.save_results:
|
||||
return
|
||||
|
||||
# Ergebnisverzeichnis erstellen, falls es nicht existiert
|
||||
os.makedirs(config.results_dir, exist_ok=True)
|
||||
|
||||
# Zeitstempel für eindeutige Dateinamen
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
|
||||
# Speichere die Analyse
|
||||
analysis_file = os.path.join(config.results_dir, f"analysis_{timestamp}.json")
|
||||
with open(analysis_file, "w", encoding="utf-8") as f:
|
||||
json.dump(analysis, f, indent=2, ensure_ascii=False)
|
||||
logger.info(f"Analyse gespeichert in: {analysis_file}")
|
||||
|
||||
# Speichere den vollständigen Workflow (ohne große Binärdaten)
|
||||
workflow_copy = workflow.copy()
|
||||
|
||||
# Entferne Binärdaten aus dem Export, um die Dateigröße zu reduzieren
|
||||
for message in workflow_copy.get("messages", []):
|
||||
if "documents" in message:
|
||||
for doc in message.get("documents", []):
|
||||
if "contents" in doc:
|
||||
for content in doc.get("contents", []):
|
||||
if "data" in content and isinstance(content["data"], bytes) and len(content["data"]) > 1000:
|
||||
content["data"] = f"[{len(content['data'])} Bytes]"
|
||||
|
||||
workflow_file = os.path.join(config.results_dir, f"workflow_{timestamp}.json")
|
||||
with open(workflow_file, "w", encoding="utf-8") as f:
|
||||
# Konvertiere Bytes zu Strings für JSON-Serialisierung
|
||||
json.dump(workflow_copy, f, indent=2, ensure_ascii=False, default=lambda o:
|
||||
o.decode("utf-8") if isinstance(o, bytes) else str(o))
|
||||
logger.info(f"Workflow gespeichert in: {workflow_file}")
|
||||
|
||||
async def cleanup_test_files(config: TestConfig, file_ids: List[int]) -> None:
|
||||
"""
|
||||
Bereinigt die erstellten Testdateien.
|
||||
|
||||
Args:
|
||||
config: Testkonfiguration
|
||||
file_ids: Liste der zu löschenden Datei-IDs
|
||||
"""
|
||||
if not config.cleanup:
|
||||
logger.info("Bereinigung übersprungen (--no-cleanup)")
|
||||
return
|
||||
|
||||
logger.info("Beginne Bereinigung der Testdateien...")
|
||||
|
||||
lucy_interface = get_lucydom_interface(config.mandate_id, config.user_id)
|
||||
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
success = lucy_interface.delete_file(file_id)
|
||||
if success:
|
||||
logger.info(f"Datei mit ID {file_id} erfolgreich gelöscht")
|
||||
else:
|
||||
logger.warning(f"Fehler beim Löschen der Datei mit ID {file_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Löschen der Datei mit ID {file_id}: {str(e)}")
|
||||
|
||||
logger.info("Bereinigung abgeschlossen")
|
||||
|
||||
async def main():
|
||||
"""
|
||||
Hauptfunktion, die den gesamten Testprozess steuert.
|
||||
"""
|
||||
# Konfiguration laden
|
||||
config = parse_args()
|
||||
|
||||
try:
|
||||
logger.info("=== Test-Workflow für ChatManager gestartet ===")
|
||||
logger.info(f"Mandate-ID: {config.mandate_id}, User-ID: {config.user_id}")
|
||||
|
||||
# Schritt 1: Testdateien erstellen
|
||||
text_file_id, image_file_id = await create_test_files(config)
|
||||
file_ids = [text_file_id, image_file_id]
|
||||
|
||||
# Schritt 2: Hochgeladene Dateien überprüfen
|
||||
files_ok = await verify_uploaded_files(config.mandate_id, config.user_id, file_ids)
|
||||
if not files_ok:
|
||||
logger.error("Fehler bei den hochgeladenen Dateien, Test wird abgebrochen")
|
||||
return
|
||||
|
||||
# Schritt 3: Chat-Workflow ausführen
|
||||
workflow_result = await run_chat_workflow(config, file_ids)
|
||||
|
||||
# Schritt 4: Ergebnis analysieren
|
||||
analysis = analyze_workflow_result(workflow_result)
|
||||
|
||||
# Schritt 5: Ergebnisse speichern
|
||||
save_test_results(config, workflow_result, analysis)
|
||||
|
||||
# Schritt 6: Bereinigen
|
||||
await cleanup_test_files(config, file_ids)
|
||||
|
||||
logger.info("=== Test-Workflow erfolgreich abgeschlossen ===")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler im Test-Workflow: {str(e)}", exc_info=True)
|
||||
logger.info("=== Test-Workflow mit Fehler beendet ===")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Event-Loop für asyncio erstellen und Hauptfunktion ausführen
|
||||
loop = asyncio.get_event_loop()
|
||||
loop.run_until_complete(main())
|
||||
Loading…
Reference in a new issue