From b0c45fb7980b2892ee9019d71d07a56e5f912cff Mon Sep 17 00:00:00 2001 From: valueon Date: Mon, 14 Apr 2025 20:05:33 +0200 Subject: [PATCH] stable backbone (workflow, agents) --- .../agentservice_agent_analyst.py | 190 -- .../agentservice_agent_coder.py | 426 ---- .../agentservice_agent_documentation.py | 422 ---- .../agentservice_agent_filecreator.py | 168 -- .../agentservice_agent_sharepoint.py | 175 -- .../agentservice_agent_webcrawler.py | 512 ----- gwserver/_old_bk_modules/agentservice_base.py | 124 -- .../agentservice_code_executor.py | 556 ------ .../agentservice_dataextraction.py | 475 ----- .../agentservice_filehandling.py | 638 ------ .../_old_bk_modules/agentservice_registry.py | 146 -- .../agentservice_workflow_manager.py | 1333 ------------- gwserver/_old_bk_modules/gateway_interface.py | 469 ----- gwserver/_old_bk_modules/gateway_model.py | 94 - gwserver/_old_bk_modules/lucydom_interface.py | 1265 ------------ gwserver/_old_bk_modules/lucydom_model.py | 149 -- gwserver/app.py | 3 +- .../modules/agentservice_agent_analyst.py | 1746 +++++++++++++++-- gwserver/modules/agentservice_agent_coder.py | 1310 +++++++------ .../agentservice_agent_documentation.py | 771 ++++---- .../modules/agentservice_agent_sharepoint.py | 209 -- .../modules/agentservice_agent_webcrawler.py | 681 ++++--- gwserver/modules/agentservice_base.py | 278 ++- gwserver/modules/agentservice_code_helpers.py | 750 ------- .../modules/agentservice_dataextraction.py | 312 ++- .../modules/agentservice_document_handler.py | 498 +++++ gwserver/modules/agentservice_filemanager.py | 175 +- gwserver/modules/agentservice_protocol.py | 338 ++++ gwserver/modules/agentservice_registry.py | 116 +- gwserver/modules/agentservice_utils.py | 3 +- .../agentservice_workflow_execution.py | 767 ++++---- .../modules/agentservice_workflow_manager.py | 242 ++- .../agentservice_agent_coder.py | 500 +++++ gwserver/test.py | 9 +- gwserver/workflow_test_result.json | 143 +- notes/changelog.txt | 21 +- 36 files changed, 5830 insertions(+), 10184 deletions(-) delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_analyst.py delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_coder.py delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_documentation.py delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_filecreator.py delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_sharepoint.py delete mode 100644 gwserver/_old_bk_modules/agentservice_agent_webcrawler.py delete mode 100644 gwserver/_old_bk_modules/agentservice_base.py delete mode 100644 gwserver/_old_bk_modules/agentservice_code_executor.py delete mode 100644 gwserver/_old_bk_modules/agentservice_dataextraction.py delete mode 100644 gwserver/_old_bk_modules/agentservice_filehandling.py delete mode 100644 gwserver/_old_bk_modules/agentservice_registry.py delete mode 100644 gwserver/_old_bk_modules/agentservice_workflow_manager.py delete mode 100644 gwserver/_old_bk_modules/gateway_interface.py delete mode 100644 gwserver/_old_bk_modules/gateway_model.py delete mode 100644 gwserver/_old_bk_modules/lucydom_interface.py delete mode 100644 gwserver/_old_bk_modules/lucydom_model.py delete mode 100644 gwserver/modules/agentservice_agent_sharepoint.py delete mode 100644 gwserver/modules/agentservice_code_helpers.py create mode 100644 gwserver/modules/agentservice_document_handler.py create mode 100644 gwserver/modules/agentservice_protocol.py create mode 100644 gwserver/old_modules_copy/agentservice_agent_coder.py diff --git a/gwserver/_old_bk_modules/agentservice_agent_analyst.py b/gwserver/_old_bk_modules/agentservice_agent_analyst.py deleted file mode 100644 index a7a58070..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_analyst.py +++ /dev/null @@ -1,190 +0,0 @@ -""" -Datenanalyst-Agent für die Analyse und Interpretation von Daten. -""" - -import logging -from typing import List, Dict, Any, Optional -from modules.agentservice_base import BaseAgent -from connectors.connector_aichat_openai import ChatService - -logger = logging.getLogger(__name__) - -class AnalystAgent(BaseAgent): - """Agent für die Analyse und Interpretation von Daten""" - - _instance = None - - @classmethod - def get_instance(cls): - """Gibt eine Singleton-Instanz zurück""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - def __init__(self): - """Initialisiert den Datenanalyst-Agenten""" - super().__init__() - self.id = "analyst_agent" - self.name = "Datenanalyst" - self.type = "analyzer" - self.description = "Analysiert und interpretiert Daten" - self.capabilities = "Datenanalyse, Mustererkennung, Statistik und Bewertung" - self.instructions = """ - Du bist der Datenanalyseagent. Deine Aufgabe: - 1. Vorliegende Daten untersuchen und interpretieren - 2. Erkenntnisse aus Informationen gewinnen - 3. Trends identifizieren und Zusammenhänge prüfen - 4. Daten visualisieren und Konzepte erklären - 5. Datenqualität bewerten und Handlungsempfehlungen geben - """ - self.result_format = "AnalysisReport" - - def get_prompt(self, message_context: Dict[str, Any]) -> str: - """ - Generiert einen angepassten Prompt für den Datenanalysten. - - Args: - message_context: Kontext der Nachricht - - Returns: - Formatierter Prompt für den Datenanalysten - """ - # Basis-Prompt - prompt = f""" - Du bist {self.name}, ein {self.type} Agent. - - {self.description} - - Fähigkeiten: {self.capabilities} - - {self.instructions} - - Analysiere die vorliegenden Daten. Präsentiere klar strukturierte Ergebnisse - mit einer Zusammenfassung, Detailanalyse und Handlungsempfehlungen. - - Formatiere mit [STATUS: ERGEBNIS/TEILWEISE/PLAN] am Ende. - """ - - # Dateitypspezifische Anweisungen hinzufügen (verkürzt) - document_types = self._get_document_types(message_context) - - if "csv" in document_types or "excel" in document_types: - prompt += "\nTABELLENDATEN: Identifiziere wichtige Spalten, Korrelationen und Trends." - - if "pdf" in document_types or "doc" in document_types: - prompt += "\nTEXTDATEN: Extrahiere zentrale Fakten und Schlüsselthemen." - - if "image" in document_types: - prompt += "\nBILDDATEN: Beschreibe und interpretiere dargestellte Informationen." - - return prompt.strip() - - def _get_document_types(self, message_context: Dict[str, Any]) -> List[str]: - """ - Extrahiert die Dateitypen aus dem Nachrichtenkontext. - - Args: - message_context: Kontext der Nachricht - - Returns: - Liste der Dateitypen - """ - document_types = [] - - # Versuche Dokumente aus dem Kontext zu extrahieren - documents = message_context.get("documents", []) - - for doc in documents: - source = doc.get("source", {}) - name = source.get("name", "").lower() - content_type = source.get("content_type", "").lower() - - # Dateityp aus Namen oder Content-Type ableiten - if name.endswith(".csv") or "csv" in content_type: - document_types.append("csv") - elif name.endswith((".xls", ".xlsx")) or "excel" in content_type or "spreadsheet" in content_type: - document_types.append("excel") - elif name.endswith(".pdf") or "pdf" in content_type: - document_types.append("pdf") - elif name.endswith((".doc", ".docx")) or "word" in content_type: - document_types.append("doc") - elif name.endswith((".jpg", ".jpeg", ".png", ".gif")) or "image" in content_type: - document_types.append("image") - - return document_types - - async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und führt eine Datenanalyse durch. - - Args: - message: Die zu verarbeitende Nachricht - context: Zusätzlicher Kontext (optional) - - Returns: - Die generierte Antwort mit der Datenanalyse - """ - try: - # Prompt generieren - message_context = {"documents": context.get("documents", [])} if context else {} - prompt = self.get_prompt(message_context) - - # OpenAI ChatService initialisieren - chat_service = ChatService() - - # Nachrichten für die API vorbereiten - messages = [ - {"role": "system", "content": prompt}, - {"role": "user", "content": message.get("content", "")} - ] - - # Kontext-Nachrichten hinzufügen, falls vorhanden - if context and "history" in context: - for history_item in context["history"]: - messages.append({ - "role": history_item.get("role", "user"), - "content": history_item.get("content", "") - }) - - # API aufrufen - response_content = await chat_service.call_api(messages) - - # Verbindung schließen - await chat_service.close() - - # Antwort-Objekt erstellen - analysis_response = { - "role": "assistant", - "content": response_content, - "agent_type": self.type - } - - # Extrahiere den Status aus der Antwort und aktualisiere den Inhalt - content, status = self.extract_status(analysis_response["content"]) - analysis_response["content"] = content - - # Setze den Status im Kontext, falls vorhanden - if context is not None: - context["status"] = status - analysis_response["result_format"] = self.result_format - return analysis_response - - except Exception as e: - logger.error(f"Fehler bei der Verarbeitung der Anfrage: {str(e)}", exc_info=True) - - # Fehlerantwort zurückgeben - return { - "role": "assistant", - "content": f"Bei der Datenanalyse ist ein Fehler aufgetreten: {str(e)}", - "agent_type": self.type - } - -# Singleton-Instanz -_analyst_agent = None - -def get_analyst_agent(): - """Gibt eine Singleton-Instanz des Datenanalyst-Agenten zurück""" - global _analyst_agent - if _analyst_agent is None: - _analyst_agent = AnalystAgent() - return _analyst_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_agent_coder.py b/gwserver/_old_bk_modules/agentservice_agent_coder.py deleted file mode 100644 index c673fa92..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_coder.py +++ /dev/null @@ -1,426 +0,0 @@ -""" -Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code (Fortsetzung). -""" - -import logging -import json -import os -from typing import List, Dict, Any, Optional -import asyncio -import re -import traceback -from datetime import datetime - -from modules.agentservice_base import BaseAgent -from modules.lucydom_interface import get_lucydom_interface -from modules.agentservice_code_executor import CodeExecutor - -logger = logging.getLogger(__name__) - -class CoderAgent(BaseAgent): - """Erweiterter Agent für die Entwicklung und Ausführung von Python-Code""" - - async def _execute_code(self, code: str, lucydom_interface, context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Führt Python-Code mit dem CodeExecutor aus. - - Args: - code: Der auszuführende Python-Code - lucydom_interface: Interface für Datenbankzugriffe - context: Zusätzlicher Kontext - - Returns: - Ergebnis der Codeausführung - """ - try: - # Systemfunktionen für den Code vorbereiten - system_functions_code = self._prepare_system_functions(lucydom_interface) - - # Code mit Systemfunktionen erweitern - enhanced_code = system_functions_code + "\n\n" + code - - # CodeExecutor initialisieren - available_modules = [ - "modules.lucydom_interface", - "modules.lucydom_model", - "modules.agentservice_filehandling" - ] - - # Liste erlaubter Pakete - allowed_packages = None # None bedeutet alle erlaubt außer explizit blockierte - - # Liste blockierter Pakete - blocked_packages = [ - "cryptography", "flask", "django", "tornado", # Sicherheitsrisiken - "tensorflow", "pytorch", "scikit-learn" # Ressourcenintensiv - ] - - executor = CodeExecutor( - app_modules=available_modules, - timeout=60, # 60 Sekunden Timeout - max_memory_mb=512, # 512MB Speicherlimit - allowed_packages=allowed_packages, - blocked_packages=blocked_packages - ) - - try: - # Eingabedaten vorbereiten (falls vorhanden) - input_data = { - "context": context, - "workflow_id": context.get("workflow_id", "") if context else "", - } - - # Dateireferenzen hinzufügen - if context and "documents" in context: - file_refs = [] - for doc in context.get("documents", []): - source = doc.get("source", {}) - if source.get("type") == "file": - file_refs.append({ - "id": source.get("id", ""), - "name": source.get("name", ""), - "type": source.get("content_type", "") - }) - input_data["files"] = file_refs - - # Code ausführen - result = executor.execute_code(enhanced_code, input_data) - - # Log für die Ausführung - if result.get("success", False): - logger.info(f"Code erfolgreich ausgeführt") - output = result.get("output", "") - if output: - logger.debug(f"Ausgabe: {output[:200]}..." if len(output) > 200 else output) - else: - logger.error(f"Fehler bei der Codeausführung: {result.get('error', 'Unbekannter Fehler')}") - - return result - finally: - # Ressourcen freigeben - executor.cleanup() - - except Exception as e: - logger.error(f"Fehler bei der Codeausführung: {str(e)}", exc_info=True) - - return { - "success": False, - "output": "", - "error": f"Fehler bei der Ausführung: {str(e)}\n{traceback.format_exc()}", - "result": None - } - - def _prepare_system_functions(self, lucydom_interface) -> str: - """ - Bereitet die Systemfunktionen für den auszuführenden Code vor. - - Args: - lucydom_interface: Interface für Datenbankzugriffe - - Returns: - Python-Code für die Systemfunktionen - """ - system_functions_code = """ -# Systemfunktionen für den Code - -async def load_file(file_id, encoding=None): - \"\"\" - Lädt eine Datei aus der Datenbank anhand ihrer ID. - - Args: - file_id: ID der zu ladenden Datei - encoding: Optionale Kodierung (Standard: None für binäre Daten) - - Returns: - Binäre Daten oder dekodierter String, je nach Encoding-Parameter - \"\"\" - try: - # lucydom_interface wird über Globals zur Verfügung gestellt - global lucydom_interface - - if not lucydom_interface: - raise ValueError("LucyDOM-Interface nicht verfügbar") - - # Dateiinhalt asynchron laden - file_content = await lucydom_interface.read_file_content(file_id) - - if not file_content: - raise ValueError(f"Datei mit ID {file_id} nicht gefunden") - - # Wenn Encoding angegeben ist, String zurückgeben - if encoding: - return file_content.decode(encoding) - - # Andernfalls binäre Daten zurückgeben - return file_content - except Exception as e: - print(f"Fehler beim Laden der Datei {file_id}: {str(e)}") - raise - -def save_file(content, file_name, content_type=None): - \"\"\" - Speichert Daten als Datei in der Datenbank. - - Args: - content: Zu speichernde Daten (String oder Bytes) - file_name: Name der Datei - content_type: MIME-Typ der Datei (z.B. 'text/csv') - - Returns: - Metadaten der gespeicherten Datei inkl. ID - \"\"\" - try: - # lucydom_interface wird über Globals zur Verfügung gestellt - global lucydom_interface - - if not lucydom_interface: - raise ValueError("LucyDOM-Interface nicht verfügbar") - - # Wenn der Inhalt ein String ist, in Bytes konvertieren - if isinstance(content, str): - content = content.encode('utf-8') - - # Datei speichern - file_meta = lucydom_interface.save_uploaded_file(content, file_name) - - # Wenn content_type angegeben ist, Datei-Metadaten aktualisieren - if content_type and "id" in file_meta: - update_data = {"content_type": content_type} - lucydom_interface.update_file(file_meta["id"], update_data) - file_meta["content_type"] = content_type - - return file_meta - except Exception as e: - print(f"Fehler beim Speichern der Datei {file_name}: {str(e)}") - raise - -def update_file(file_id, content, update_metadata=None): - \"\"\" - Aktualisiert eine bestehende Datei in der Datenbank. - - Args: - file_id: ID der zu aktualisierenden Datei - content: Neue Inhalte für die Datei (String oder Bytes) - update_metadata: Optionale Metadaten-Updates - - Returns: - Aktualisierte Metadaten der Datei - \"\"\" - try: - # lucydom_interface wird über Globals zur Verfügung gestellt - global lucydom_interface - - if not lucydom_interface: - raise ValueError("LucyDOM-Interface nicht verfügbar") - - # Wenn der Inhalt ein String ist, in Bytes konvertieren - if isinstance(content, str): - content = content.encode('utf-8') - - # Temporäre Datei erstellen - import tempfile - import os - - temp_file = tempfile.NamedTemporaryFile(delete=False) - temp_file.write(content) - temp_file.close() - - # Bestehende Datei abrufen - file_meta = lucydom_interface.get_file(file_id) - - if not file_meta: - raise ValueError(f"Datei mit ID {file_id} nicht gefunden") - - # Datei mit neuen Inhalten aktualisieren - with open(temp_file.name, 'rb') as f: - updated_meta = lucydom_interface.save_uploaded_file(content, file_meta.get("name", "updated_file")) - - # Temporäre Datei löschen - os.unlink(temp_file.name) - - # Metadaten aktualisieren - if update_metadata and "id" in updated_meta: - lucydom_interface.update_file(updated_meta["id"], update_metadata) - updated_meta.update(update_metadata) - - return updated_meta - except Exception as e: - print(f"Fehler beim Aktualisieren der Datei {file_id}: {str(e)}") - raise - -def get_file_metadata(file_id): - \"\"\" - Ruft die Metadaten einer Datei ab. - - Args: - file_id: ID der Datei - - Returns: - Metadaten der Datei als Dictionary - \"\"\" - try: - # lucydom_interface wird über Globals zur Verfügung gestellt - global lucydom_interface - - if not lucydom_interface: - raise ValueError("LucyDOM-Interface nicht verfügbar") - - # Datei-Metadaten abrufen - file_meta = lucydom_interface.get_file(file_id) - - if not file_meta: - raise ValueError(f"Datei mit ID {file_id} nicht gefunden") - - return file_meta - except Exception as e: - print(f"Fehler beim Abrufen der Metadaten für Datei {file_id}: {str(e)}") - raise - -def process_csv(content, operations=None): - \"\"\" - Verarbeitet CSV-Daten mit Pandas. - - Args: - content: CSV-Daten als String oder Bytes - operations: Liste von Operationen, die auf den Daten ausgeführt werden sollen - [{"type": "filter", "column": "Name", "value": "Max"}, - {"type": "groupby", "column": "Category"}] - - Returns: - Ergebnis der Verarbeitung als Dictionary - \"\"\" - try: - import pandas as pd - import io - - # Wenn der Inhalt Bytes ist, in String konvertieren - if isinstance(content, bytes): - content = content.decode('utf-8') - - # CSV in DataFrame laden - df = pd.read_csv(io.StringIO(content)) - - # Wenn Operationen angegeben sind, diese durchführen - if operations: - for op in operations: - op_type = op.get("type", "").lower() - - if op_type == "filter" and "column" in op and "value" in op: - df = df[df[op["column"]] == op["value"]] - - elif op_type == "groupby" and "column" in op: - groupby_column = op["column"] - agg_column = op.get("aggregate_column") - agg_func = op.get("aggregate_function", "count") - - if agg_column: - df = df.groupby(groupby_column).agg({agg_column: agg_func}).reset_index() - else: - df = df.groupby(groupby_column).size().reset_index(name='count') - - # Ergebnis zurückgeben - return { - "data": df.to_dict('records'), - "columns": df.columns.tolist(), - "shape": df.shape - } - except Exception as e: - print(f"Fehler bei der CSV-Verarbeitung: {str(e)}") - raise - -def extract_text_from_pdf(pdf_data): - \"\"\" - Extrahiert Text aus einem PDF-Dokument. - - Args: - pdf_data: PDF-Daten als Bytes - - Returns: - Extrahierter Text aus dem PDF - \"\"\" - try: - # Versuche PyPDF2 zu verwenden - try: - from PyPDF2 import PdfReader - from io import BytesIO - - reader = PdfReader(BytesIO(pdf_data)) - text = "" - - for page in reader.pages: - text += page.extract_text() + "\\n\\n" - - return text - except ImportError: - # Fallback auf pymupdf, falls PyPDF2 nicht verfügbar ist - try: - import fitz # PyMuPDF - from io import BytesIO - - doc = fitz.open("pdf", pdf_data) - text = "" - - for page in doc: - text += page.get_text() + "\\n\\n" - - return text - except ImportError: - return "PDF-Extraktion fehlgeschlagen: Weder PyPDF2 noch PyMuPDF sind installiert" - except Exception as e: - print(f"Fehler bei der PDF-Extraktion: {str(e)}") - return f"Fehler bei der PDF-Extraktion: {str(e)}" - -def analyze_image(image_data, analysis_type="description"): - \"\"\" - Analysiert ein Bild (KI-basiert, falls verfügbar). - - Args: - image_data: Bilddaten als Bytes - analysis_type: Art der Analyse: 'description', 'objects', 'text' - - Returns: - Ergebnis der Bildanalyse - \"\"\" - # Hinweis: Diese Funktion simuliert eine Bildanalyse, - # da die echte KI-Analyse eine async-Funktion erfordern würde - try: - # Bildgröße ermitteln - from io import BytesIO - from PIL import Image - - image = Image.open(BytesIO(image_data)) - width, height = image.size - format_name = image.format - - # Simulierte Analyse basierend auf dem Bildtyp - analysis_result = { - "image_info": { - "width": width, - "height": height, - "format": format_name, - "size_bytes": len(image_data) - }, - "analysis_type": analysis_type, - "analysis_result": f"Simulierte Bildanalyse für ein {format_name}-Bild ({width}x{height}px)" - } - - return analysis_result - except Exception as e: - print(f"Fehler bei der Bildanalyse: {str(e)}") - return {"error": str(e)} - -# lucydom_interface global verfügbar machen -import asyncio -""" - - return system_functions_code - -# Singleton-Instanz -_coder_agent = None - -def get_coder_agent(): - """Gibt eine Singleton-Instanz des Coder-Agenten zurück""" - global _coder_agent - if _coder_agent is None: - _coder_agent = CoderAgent() - return _coder_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_agent_documentation.py b/gwserver/_old_bk_modules/agentservice_agent_documentation.py deleted file mode 100644 index 818261ed..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_documentation.py +++ /dev/null @@ -1,422 +0,0 @@ -""" -Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten. -Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation. -""" - -import logging -from typing import List, Dict, Any, Optional, Tuple -from modules.agentservice_base import BaseAgent -from connectors.connector_aichat_openai import ChatService - -logger = logging.getLogger(__name__) - -class DocumentationAgent(BaseAgent): - """Agent für die Erstellung von Dokumentation und strukturierten Inhalten""" - - _instance = None - - @classmethod - def get_instance(cls): - """Gibt eine Singleton-Instanz zurück""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - def __init__(self): - """Initialisiert den Dokumentations-Agenten""" - super().__init__() - self.id = "documentation_agent" - self.name = "Dokumentation" - self.type = "documentation" - self.description = "Erstellt Dokumentation und strukturierte Inhalte" - self.capabilities = "Berichte, Dokumentationen" - self.instructions = """ - Du bist der Dokumentations-Agent. Deine Aufgabe: - 1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen - 2. Verschiedene Dokumentformate erstellen - 3. Informationen aus verschiedenen Quellen strukturieren - 4. Technische Konzepte verständlich erklären - 5. Konsistente Formatierung sicherstellen - """ - # Chat-Service initialisieren - self.chat_service = None - self.result_format = "FormattedDocument" - - def get_base_prompt(self, document_type: str = "") -> str: - """ - Generiert einen Basis-Prompt für den Dokumentations-Agenten. - - Args: - document_type: Typ des zu erstellenden Dokuments - - Returns: - Basis-Prompt für den Dokumentations-Agenten - """ - # Basis-Prompt - prompt = f""" - Du bist {self.name}, ein {self.type} Agent. - - {self.description} - - Fähigkeiten: {self.capabilities} - - {self.instructions} - """ - - # Dokumenttyp-spezifische Anweisungen hinzufügen - if document_type: - prompt += self._get_document_type_instructions(document_type) - - return prompt.strip() - - def _get_document_type_instructions(self, document_type: str) -> str: - """ - Gibt spezifische Anweisungen für einen bestimmten Dokumenttyp zurück. - - Args: - document_type: Typ des Dokuments - - Returns: - Spezifische Anweisungen für den Dokumenttyp - """ - document_type = document_type.lower() - - if "handbuch" in document_type or "anleitung" in document_type or "guide" in document_type: - return "\n\nHANDBUCH: Beginne mit Zweckbeschreibung, strukturiere in logische Schritte, verwende direkte Anweisungen." - elif "bericht" in document_type or "report" in document_type: - return "\n\nBERICHT: Beginne mit Executive Summary, strukturiere in thematische Abschnitte, halte professionellen Ton." - elif "prozess" in document_type or "process" in document_type: - return "\n\nPROZESS: Beschreibe Zweck, Ziele, Beteiligte, sequenzielle Schritte, Inputs/Outputs und Verantwortlichkeiten." - elif "präsentation" in document_type or "presentation" in document_type: - return "\n\nPRÄSENTATION: Klare Hauptpunkte, visuelle Elemente, Einleitung-Hauptteil-Schluss Struktur." - else: - return "\n\nDOKUMENT: Erstelle ein gut strukturiertes Dokument mit klarer Gliederung und präziser Sprache." - - def _detect_document_type(self, message: str) -> str: - """ - Erkennt den Dokumenttyp aus der Nachricht. - - Args: - message: Nachricht des Benutzers - - Returns: - Erkannter Dokumenttyp - """ - message = message.lower() - - if "handbuch" in message or "anleitung" in message or "guide" in message: - return "handbuch" - elif "bericht" in message or "report" in message: - return "bericht" - elif "prozess" in message or "process" in message or "ablauf" in message: - return "prozess" - elif "präsentation" in message or "presentation" in message or "folien" in message: - return "präsentation" - else: - return "dokument" - - async def generate_title(self, task: str, document_type: str) -> str: - """ - Generiert einen Titel für das Dokument. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - - Returns: - Generierter Titel - """ - prompt = f""" - Erstelle einen prägnanten, professionellen Titel für folgendes {document_type.capitalize()}: - - AUFTRAG: {task} - - Gib NUR den Titel zurück, ohne weitere Erklärungen oder Formatierungen. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung von Dokumenttiteln."}, - {"role": "user", "content": prompt} - ] - - title = await self.chat_service.call_api(messages) - - # Bereinige den Titel von Anführungszeichen und Überschriften-Symbolen - title = title.strip('"\'#*- \n\t') - - return title - - async def generate_summary(self, task: str, document_type: str, title: str) -> str: - """ - Generiert eine Zusammenfassung für das Dokument. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - - Returns: - Generierte Zusammenfassung - """ - prompt = f""" - Erstelle eine prägnante Zusammenfassung für folgendes Dokument: - - TITEL: {title} - TYP: {document_type.capitalize()} - AUFTRAG: {task} - - Die Zusammenfassung soll einen Überblick über den Zweck und die Hauptinhalte des Dokuments geben. - Sie sollte etwa 3-5 Sätze umfassen und als eigenständiger Abschnitt funktionieren. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung prägnanter Dokumentzusammenfassungen."}, - {"role": "user", "content": prompt} - ] - - summary = await self.chat_service.call_api(messages) - - return summary.strip() - - async def generate_toc_with_prompts(self, task: str, document_type: str, title: str, summary: str) -> Dict[str, str]: - """ - Generiert ein Inhaltsverzeichnis mit Prompts für die einzelnen Kapitel. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - summary: Zusammenfassung des Dokuments - - Returns: - Dict mit Kapiteltiteln als Schlüssel und Prompts als Werte - """ - prompt = f""" - Erstelle ein strukturiertes Inhaltsverzeichnis für folgendes Dokument: - - TITEL: {title} - TYP: {document_type.capitalize()} - AUFTRAG: {task} - ZUSAMMENFASSUNG: {summary} - - Für jedes Kapitel gib auch einen kurzen Prompt an, der beschreibt, was in diesem Kapitel behandelt werden soll. - Formatiere deine Antwort als JSON-Objekt mit folgendem Format: - {{ - "Kapitel 1: Titel": "Prompt für Kapitel 1", - "Kapitel 2: Titel": "Prompt für Kapitel 2", - ... - }} - - Beschränke dich auf 5-7 sinnvolle Kapitel, die das Thema umfassend behandeln. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Strukturierung von Dokumenten und die Erstellung von Inhaltsverzeichnissen."}, - {"role": "user", "content": prompt} - ] - - toc_response = await self.chat_service.call_api(messages) - - # JSON aus der Antwort extrahieren - import json - import re - - # Markdown-Code-Blöcke entfernen, falls vorhanden - toc_response = re.sub(r'```json\s*|\s*```', '', toc_response) - - try: - toc_with_prompts = json.loads(toc_response) - return toc_with_prompts - except json.JSONDecodeError as e: - logger.error(f"Fehler beim Parsen des Inhaltsverzeichnisses: {str(e)}") - logger.error(f"Rohe Antwort: {toc_response}") - # Notfall-Fallback - return { - "1. Einleitung": "Einführung in das Thema und Überblick", - "2. Hauptteil": "Hauptinhalte des Dokuments", - "3. Schlussfolgerung": "Zusammenfassung und nächste Schritte" - } - - async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str, - task: str, document_type: str, title: str, summary: str) -> str: - """ - Generiert den Inhalt für ein bestimmtes Kapitel. - - Args: - chapter_title: Titel des Kapitels - chapter_prompt: Prompt für das Kapitel - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - summary: Zusammenfassung des Dokuments - - Returns: - Generierter Kapitelinhalt - """ - prompt = f""" - Erstelle detaillierten Inhalt für folgendes Kapitel eines {document_type}s: - - DOKUMENT-TITEL: {title} - AUFGABE: {task} - KAPITEL: {chapter_title} - ANWEISUNG FÜR DIESES KAPITEL: {chapter_prompt} - - Der Inhalt sollte detailliert, informativ und gut strukturiert sein. - Verwende bei Bedarf Unterüberschriften, Aufzählungen und Tabellen zur besseren Strukturierung. - Der Inhalt sollte direkt mit dem Kapiteltext beginnen, ohne den Kapiteltitel zu wiederholen. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung hochwertiger Dokumentationsinhalte."}, - {"role": "user", "content": prompt} - ] - - chapter_content = await self.chat_service.call_api(messages) - - return chapter_content.strip() - - def _format_final_document(self, title: str, summary: str, toc: Dict[str, str], chapter_contents: Dict[str, str]) -> str: - """ - Formatiert das endgültige Dokument aus allen Teilen. - - Args: - title: Titel des Dokuments - summary: Zusammenfassung - toc: Inhaltsverzeichnis (Dict mit Kapiteltiteln als Schlüssel) - chapter_contents: Kapitelinhalte (Dict mit Kapiteltiteln als Schlüssel und Inhalten als Werte) - - Returns: - Formatiertes Dokument - """ - # Titel formatieren - doc = f"# {title}\n\n" - - # Zusammenfassung hinzufügen - doc += f"## Zusammenfassung\n\n{summary}\n\n" - - # Inhaltsverzeichnis hinzufügen - doc += "## Inhaltsverzeichnis\n\n" - for idx, chapter in enumerate(toc.keys(), 1): - # Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden) - clean_chapter = chapter - if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter: - clean_chapter = chapter.split('. ', 1)[1] - - doc += f"{idx}. {clean_chapter}\n" - doc += "\n" - - # Kapitelinhalte hinzufügen - for idx, (chapter, content) in enumerate(chapter_contents.items(), 1): - # Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden) - clean_chapter = chapter - if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter: - clean_chapter = chapter.split('. ', 1)[1] - - doc += f"## {idx}. {clean_chapter}\n\n{content}\n\n" - - # Metadaten hinzufügen - doc += "---\n\n" - doc += f"**Erstellt durch:** {self.name}\n" - - return doc - - async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess. - - Args: - message: Die zu verarbeitende Nachricht - context: Zusätzlicher Kontext - - Returns: - Die generierte Dokumentation - """ - try: - # Chat-Service initialisieren, falls noch nicht geschehen - if self.chat_service is None: - self.chat_service = ChatService() - - # Task aus der Nachricht extrahieren - task = message.get("content", "") - if context and "task" in context: - task = context["task"] - - # Dokumenttyp erkennen - document_type = self._detect_document_type(task) - - logger.info(f"Starte Dokumentationserstellung für Typ: {document_type}") - - # Schritt 1: Titel generieren - title = await self.generate_title(task, document_type) - logger.info(f"Titel generiert: {title}") - - # Schritt 2: Zusammenfassung generieren - summary = await self.generate_summary(task, document_type, title) - logger.info("Zusammenfassung generiert") - - # Schritt 3: Inhaltsverzeichnis mit Prompts generieren - toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary) - logger.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert") - - # Schritt 4: Kapitelinhalte in einer Schleife generieren - chapter_contents = {} - for chapter_title, chapter_prompt in toc_with_prompts.items(): - logger.info(f"Generiere Inhalt für Kapitel: {chapter_title}") - content = await self.generate_chapter_content( - chapter_title, chapter_prompt, task, document_type, title, summary - ) - chapter_contents[chapter_title] = content - - # Schritt 5: Dokument zusammenführen - final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents) - logger.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen") - - # Schritt 6: Antwort zurückgeben - documentation_response = { - "role": "assistant", - "content": f"{final_document}\n\n[STATUS: ERGEBNIS]", - "agent_type": self.type - } - - # Extrahiere den Status aus der Antwort und aktualisiere den Inhalt - content, status = self.extract_status(documentation_response["content"]) - documentation_response["content"] = content - - # Setze den Status im Kontext, falls vorhanden - if context is not None: - context["status"] = status - - # Chat-Service schließen - await self.chat_service.close() - self.chat_service = None - documentation_response["result_format"] = self.result_format - - return documentation_response - - except Exception as e: - logger.error(f"Fehler bei der Dokumentationserstellung: {str(e)}", exc_info=True) - - # Chat-Service schließen bei Fehler - if self.chat_service: - try: - await self.chat_service.close() - except: - pass - self.chat_service = None - - # Fehlerantwort zurückgeben - return { - "role": "assistant", - "content": f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}", - "agent_type": self.type - } - -# Singleton-Instanz -_documentation_agent = None - -def get_documentation_agent(): - """Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück""" - global _documentation_agent - if _documentation_agent is None: - _documentation_agent = DocumentationAgent() - return _documentation_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_agent_filecreator.py b/gwserver/_old_bk_modules/agentservice_agent_filecreator.py deleted file mode 100644 index 2fea1d7f..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_filecreator.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Filecreator-Agent für die Erstellung von Dateien mit Inhalten und deren Speicherung in der Datenbank (Fortsetzung). -""" - -import logging -import base64 -from typing import List, Dict, Any, Optional, Tuple -import uuid -from datetime import datetime -from modules.agentservice_base import BaseAgent - -logger = logging.getLogger(__name__) - -class FilecreatorAgent(BaseAgent): - """Agent für die Erstellung und Speicherung von Dateien""" - - # (Vorherige Implementierung hier) - - def _extract_file_params(self, message_content: str) -> Dict[str, Any]: - """ - Extrahiert Dateiparameter aus dem Nachrichteninhalt. - - Args: - message_content: Inhalt der Nachricht - - Returns: - Dictionary mit Dateiparametern - """ - # Grundlegende Parameter - file_params = { - "name": "document.txt", - "content": "", - "type": "text/plain" - } - - # Einfache Heuristik zur Extraktion der Parameter - lines = message_content.split('\n') - content_lines = [] - is_content_section = False - - for line in lines: - line = line.strip() - - # Dateiname erkennen - if line.startswith("DATEINAME:") or line.startswith("FILENAME:"): - file_params["name"] = line.split(":", 1)[1].strip() - - # Dateityp erkennen - elif line.startswith("TYP:") or line.startswith("TYPE:"): - file_type = line.split(":", 1)[1].strip().lower() - - # MIME-Typ anhand der Angabe setzen - if file_type in ["text", "txt", "plain"]: - file_params["type"] = "text/plain" - if not file_params["name"].endswith(".txt"): - file_params["name"] += ".txt" - - elif file_type in ["markdown", "md"]: - file_params["type"] = "text/markdown" - if not file_params["name"].endswith(".md"): - file_params["name"] += ".md" - - elif file_type in ["csv"]: - file_params["type"] = "text/csv" - if not file_params["name"].endswith(".csv"): - file_params["name"] += ".csv" - - elif file_type in ["json"]: - file_params["type"] = "application/json" - if not file_params["name"].endswith(".json"): - file_params["name"] += ".json" - - elif file_type in ["html"]: - file_params["type"] = "text/html" - if not file_params["name"].endswith(".html"): - file_params["name"] += ".html" - - # Inhalt sammeln - elif line == "INHALT:" or line == "CONTENT:": - is_content_section = True - continue - - elif is_content_section: - content_lines.append(line) - - # Wenn kein Inhalt gefunden wurde, versuche den gesamten Inhalt zu verwenden - if not content_lines and not is_content_section: - # Ignoriere die ersten und letzten Zeilen (können Anweisungen sein) - if len(lines) > 4: - content_lines = lines[2:-2] - else: - content_lines = lines - - # Inhalt zusammensetzen - file_params["content"] = "\n".join(content_lines) - - # Dateiformat aus dem Dateinamen ableiten, falls nicht explizit angegeben - if "type" not in file_params: - file_extension = file_params["name"].split(".")[-1].lower() if "." in file_params["name"] else "" - if file_extension == "md": - file_params["type"] = "text/markdown" - elif file_extension == "csv": - file_params["type"] = "text/csv" - elif file_extension == "json": - file_params["type"] = "application/json" - elif file_extension == "html": - file_params["type"] = "text/html" - else: - file_params["type"] = "text/plain" - - return file_params - - async def _create_and_save_file(self, file_params: Dict[str, Any], lucydom_interface) -> Tuple[str, str, str]: - """ - Erstellt und speichert eine Datei in der Datenbank. - - Args: - file_params: Parameter für die Dateierstellung - lucydom_interface: Interface für Datenbankzugriffe - - Returns: - Tuple mit (file_id, file_name, file_type) - """ - if not lucydom_interface: - raise ValueError("Kein LucyDOM-Interface verfügbar für die Dateispeicherung") - - # Dateiparameter extrahieren - file_name = file_params.get("name", "document.txt") - file_content = file_params.get("content", "") - content_type = file_params.get("type", "text/plain") - - # Dateityp aus dem Content-Type ableiten - file_type = "document" # Standard-Dateityp - if content_type.startswith("image/"): - file_type = "image" - - # Binäre Dateidaten erstellen - file_data = file_content.encode('utf-8') - - # Datei über LucyDOM-Interface speichern - try: - file_meta = lucydom_interface.save_uploaded_file(file_data, file_name) - - if not file_meta or "id" not in file_meta: - raise ValueError("Fehler beim Speichern der Datei") - - file_id = file_meta["id"] - - # Dateityp aktualisieren, falls notwendig - update_data = {"type": file_type, "content_type": content_type} - lucydom_interface.update_file(file_id, update_data) - - return file_id, file_name, file_type - - except Exception as e: - logger.error(f"Fehler beim Speichern der Datei {file_name}: {str(e)}") - raise ValueError(f"Fehler beim Speichern der Datei: {str(e)}") - - -# Singleton-Instanz -_filecreator_agent = None - -def get_filecreator_agent(): - """Gibt eine Singleton-Instanz des FileCreator-Agenten zurück""" - global _filecreator_agent - if _filecreator_agent is None: - _filecreator_agent = FilecreatorAgent() - return _filecreator_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_agent_sharepoint.py b/gwserver/_old_bk_modules/agentservice_agent_sharepoint.py deleted file mode 100644 index 04a61ac2..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_sharepoint.py +++ /dev/null @@ -1,175 +0,0 @@ -""" -Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten. -""" - -import logging -from typing import List, Dict, Any, Optional -from modules.agentservice_base import BaseAgent - -logger = logging.getLogger(__name__) - -class SharepointAgent(BaseAgent): - """Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen""" - - _instance = None - - @classmethod - def get_instance(cls): - """Gibt eine Singleton-Instanz zurück""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - def __init__(self): - """Initialisiert den SharePoint-Agenten""" - super().__init__() - self.id = "sharepoint_agent" - self.name = "SharePoint-Agent" - self.type = "sharepoint" - self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen" - self.capabilities = "Suche und Abruf von Dokumenten aus SharePoint, Dokumentenverwaltung, Metadaten-Extraktion und Integration von SharePoint-Inhalten" - self.instructions = """ - Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es: - - 1. SharePoint-Dokumente und -Ressourcen zu durchsuchen und abzurufen - 2. Metadaten aus SharePoint-Dokumenten zu extrahieren und zu analysieren - 3. Strukturierte Informationen aus SharePoint-Bibliotheken zu sammeln - 4. Dokumente basierend auf Metadaten zu filtern und zu organisieren - 5. Inhalte aus verschiedenen SharePoint-Quellen zu integrieren und zusammenzuführen - 6. Informationen aus SharePoint-Listen und -Dokumentbibliotheken zu extrahieren - 7. Zusammenfassungen und Analysen von SharePoint-Inhalten zu erstellen - - Bei der Darstellung deiner Ergebnisse: - - Strukturiere die Informationen klar und übersichtlich - - Gib den Ursprung und die Metadaten der Dokumente an - - Zeige Beziehungen zwischen verschiedenen Dokumenten und Ressourcen auf - - Hebe wichtige Erkenntnisse und Muster hervor - - Biete Kontext und Relevanz für die gefundenen Informationen - """ - self.result_format = "DocumentList" - - def get_prompt(self, message_context: Dict[str, Any]) -> str: - """ - Generiert einen angepassten Prompt für den SharePoint-Agenten. - - Args: - message_context: Kontext der Nachricht - - Returns: - Formatierter Prompt für den SharePoint-Agenten - """ - # Basis-Prompt vom BaseAgent holen - base_prompt = super().get_prompt(message_context) - - # Zusätzliche Anweisungen für SharePoint-Interaktion - sharepoint_instructions = """ - SHAREPOINT-INTERAKTIONS-RICHTLINIEN: - - 1. Präzisiere die Suchkriterien für SharePoint-Ressourcen - 2. Identifiziere relevante Bibliotheken, Listen und Standorte - 3. Definiere benötigte Metadaten und Inhalte - 4. Berücksichtige Berechtigungsanforderungen - 5. Priorisiere aktuelle und relevante Dokumente - 6. Stelle eine strukturierte Darstellung der Ergebnisse sicher - - Für eine gute SharePoint-Integration: - - Gib detaillierte Pfade und Standorte an - - Berücksichtige verschiedene Dokumenttypen und Formate - - Zeige Metadaten und Dokumenteigenschaften - - Biete Kontext zu den gefundenen Ressourcen - - Berücksichtige Versionsinformationen - """ - - # Task aus dem Kontext extrahieren - task = message_context.get("task", "") - task_instructions = f"\nSHAREPOINT-AUFTRAG:\n{task}\n" if task else "" - - # Vollständigen Prompt zusammenbauen - complete_prompt = f"{base_prompt}\n\n{sharepoint_instructions}\n{task_instructions}" - - return complete_prompt.strip() - - async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und interagiert mit SharePoint. - - Args: - message: Die zu verarbeitende Nachricht - context: Zusätzlicher Kontext - - Returns: - Die generierte Antwort mit SharePoint-Inhalten - """ - # Hier würde die tatsächliche Interaktion mit SharePoint stattfinden - # In der finalen Implementierung würde ein SharePoint-Connector verwendet werden - - # Als Beispiel geben wir eine Standardantwort zurück - sharepoint_response = { - "role": "assistant", - "content": f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden: - -## SharePoint-Suchergebnisse - -Basierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert: - -### Dokumente -1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025) - - Standort: Projekte/Strategische Planung - - Autor: Maria Schmidt - - Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine - -2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025) - - Standort: Marketing/Marktforschung - - Autor: Thomas Müller - - Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken - -3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025) - - Standort: Finanzen/Planung - - Autor: Sarah Weber - - Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen - -### SharePoint-Listen -1. **Projektstatusliste** - - 12 Einträge mit relevanten Projektstatusinformationen - - Letzte Aktualisierung: 25.03.2025 - -## Zusammenfassung der Inhalte - -Die gefundenen Dokumente zeigen übereinstimmend, dass: -- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt -- Das Budget für Forschung und Entwicklung um 15% erhöht wurde -- Drei neue Hauptprojekte im zweiten Quartal starten werden - -## Empfehlungen - -Basierend auf den gefundenen Informationen empfehle ich: -1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen -2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen -3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden - -Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt. - -[STATUS: ERGEBNIS]""", - "agent_type": self.type - } - - # Extrahiere den Status aus der Antwort und aktualisiere den Inhalt - content, status = self.extract_status(sharepoint_response["content"]) - sharepoint_response["content"] = content - - # Setze den Status im Kontext, falls vorhanden - if context is not None: - context["status"] = status - sharepoint_response["result_format"] = self.result_format - - return sharepoint_response - -# Singleton-Instanz -_sharepoint_agent = None - -def get_sharepoint_agent(): - """Gibt eine Singleton-Instanz des SharePoint-Agenten zurück""" - global _sharepoint_agent - if _sharepoint_agent is None: - _sharepoint_agent = SharepointAgent() - return _sharepoint_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_agent_webcrawler.py b/gwserver/_old_bk_modules/agentservice_agent_webcrawler.py deleted file mode 100644 index c3f8da7e..00000000 --- a/gwserver/_old_bk_modules/agentservice_agent_webcrawler.py +++ /dev/null @@ -1,512 +0,0 @@ -""" -WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web. -""" - -import json -import logging -import random -import time -from typing import List, Dict, Any, Optional - -import urllib -from urllib.parse import quote_plus, unquote - -from bs4 import BeautifulSoup -import requests -from modules.agentservice_base import BaseAgent -from connectors.connector_aichat_openai import ChatService - - -logger = logging.getLogger(__name__) - -class WebcrawlerAgent(BaseAgent): - """Agent für Web-Recherche und Informationsbeschaffung""" - - _instance = None - - chat_service = ChatService() - - #INIT --> should go to config - max_url=3 - max_key=3 - - max_result=3 - - timeout = 10 - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', - 'Accept-Language': 'en-US,en;q=0.5', - 'Referer': 'https://www.google.com/', - 'DNT': '1', - 'Connection': 'keep-alive', - 'Upgrade-Insecure-Requests': '1', - } - max_urls = 10 - max_content_length=100000 - - - @classmethod - def get_instance(cls): - """Gibt eine Singleton-Instanz zurück""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - def __init__(self): - """Initialisiert den WebCrawler-Agenten""" - super().__init__() - self.id = "webcrawler_agent" - self.name = "Webscraper" - self.type = "scraper" - self.description = "Recherchiert Informationen im Web" - self.capabilities = "Informationsrecherche, Datenbeschaffung aus dem Web, Quellenbewertung und Zusammenführung von Online-Informationen" - self.instructions = "" - self.result_format = "SearchResults" - - async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: - try: - # Führe die Web-Recherche durch und warte auf das Ergebnis mit await - web_query_result = await self.get_web_query(message) - - # Antwort-Objekt erstellen - response = { - "role": "assistant", - "content": f"{web_query_result} [STATUS: ERGEBNIS]", - "agent_type": self.type - } - - # Extrahiere den Status aus der Antwort und aktualisiere den Inhalt - content, status = self.extract_status(response["content"]) - response["content"] = content - - # Setze den Status im Kontext, falls vorhanden - if context is not None: - context["status"] = status - response["result_format"] = self.result_format - - return response - - except Exception as e: - logger.error(f"Fehler bei der Web-Recherche: {str(e)}", exc_info=True) - - # Fehlerantwort zurückgeben - return { - "role": "assistant", - "content": f"Bei der Web-Recherche ist ein Fehler aufgetreten: {str(e)}", - "agent_type": self.type - } - - async def get_web_query(self, message_context: Dict[str, Any]) -> str: - prompt = await self.get_prompt(message_context) - result_json = await self.run_web_query(prompt) - result_data = "" - summary_src = "" - - logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.") - if isinstance(result_json, list): - for i, result in enumerate(result_json, 1): - - web_answer_instructions = f""" - Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}' - Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen. - Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen. - - Dies ist das Resultat: - {result['data']} - """ - - # Zusätzliche Anweisungen für Web-Recherche - content_text = await self.chat_service.call_api( - messages=[ - { - "role": "system", - "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst." - }, - { - "role": "user", - "content": web_answer_instructions - } - ] - ) - result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_text}" - summary_src+=f"\n{content_text}" - else: - result_data = "no data received" - - logger.info(f"Web analysis result sent {len(result_data)}B") - - # Zusätzliche Zusammenfassung - summary="" - if len(summary_src)>1: - summary = await self.chat_service.call_api( - messages=[ - { - "role": "system", - "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen." - }, - { - "role": "user", - "content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src}\n" - } - ] - ) - result = f"{summary}\n\n{result_data}" - return result - - - async def get_prompt(self, message_context: Dict[str, Any]) -> str: - task = message_context.get("content", "") - return task.strip() - - - async def run_web_query(self, prompt: str) -> List[Dict]: - if prompt=="": - return [] - - ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open. - - 'url': A list of maximum {self.max_url} specific URLs extracted from the task string. - - 'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information. - - Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition. - """ - - content_text = await self.chat_service.call_api( - messages=[ - { - "role": "system", - "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt." - }, - { - "role": "user", - "content": ptext - } ] - ) - # Remove markdown formatting if present - if content_text.startswith("```json"): - # Find the end of the JSON block - end_marker = "```" - end_index = content_text.rfind(end_marker) - if end_index != -1: - # Extract the JSON content without the markdown markers - content_text = content_text[7:end_index].strip() - - # Now parse the JSON - try: - logger.info(f"Valid json received: {str(content_text)}") - pjson = json.loads(content_text) - # Now call scrape_json with the parsed dictionary - result_json = await self.scrape_json(pjson) - return result_json - except json.JSONDecodeError as e: - logger.error(f"Failed to parse JSON: {e}") - logger.error(f"Cleaned content: {content_text[:100]}...") - return [] - - - - async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]: - """ - Scrapes web content based on a research strategy JSON. - - Args: - research_strategy: A dictionary containing: - - 'skey': List of search keywords - - 'url': List of direct URLs to scrape - - Returns: - Dictionary with URLs as keys and scraped content as values - """ - - logger.info("Starting JSON-based web scraping") - results = [] - - # Validate input structure - if not isinstance(research_strategy, dict): - logger.error("Invalid research_strategy format: not a dictionary") - return {"error": "Invalid research_strategy format: not a dictionary"} - - keys = research_strategy.get("skey", []) - direct_urls = research_strategy.get("url", []) - - if not isinstance(keys, list) or not isinstance(direct_urls, list): - logger.error("Invalid research_strategy format: keys, or url is not a list") - return {"error": "Invalid research_strategy format: keys, or url is not a list"} - - # Process search keywords through search engine - for keyword in keys: - logger.info(f"Processing keyword: {keyword}") - found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data - logger.info(f"... {len(found_results)} results found") - results.extend(found_results) - - # Process direct URLs - logger.info(f"Processing {len(direct_urls)} direct URLs") - for url in direct_urls: - if url in results: - logger.info(f"Skipping already scraped URL: {url}") - continue - soup=self.read_url(url) - - # Extract title from the page if it exists - if isinstance(soup, BeautifulSoup): - title_tag = soup.find('title') - title = title_tag.text.strip() if title_tag else "No title" - - # Alternative: You could also look for h1 tags if the title tag is missing - if title == "No title": - h1_tag = soup.find('h1') - if h1_tag: - title = h1_tag.text.strip() - else: - # Handle the case where soup is an error message string - title = "Error fetching page" - - results.append(self.parse_result(soup,"No title",url)) - logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total") - return results - - - def search_web(self, query: str) -> List[Dict]: - formatted_query = quote_plus(query) - url = f"https://html.duckduckgo.com/html/?q={formatted_query}" - - search_results_soup = self.read_url(url) - if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0: - logger.warning(f"Keine Suchergebnisse gefunden für: {query}") - return [] - - # Extract search results - results = [] - - # Find all result containers - result_elements = search_results_soup.select('.result') - - for result in result_elements: - # Extract title - title_element = result.select_one('.result__a') - title = title_element.text.strip() if title_element else 'No title' - - # Extract URL (DuckDuckGo uses redirects, need to extract from href param) - url_element = title_element.get('href') if title_element else '' - extracted_url = 'No URL' - - if url_element: - # Extract the actual URL from DuckDuckGo's redirect - if url_element.startswith('/d.js?q='): - start = url_element.find('?q=') + 3 # Skip '?q=' - end = url_element.find('&', start) if '&' in url_element[start:] else None - extracted_url = unquote(url_element[start:end]) - - # Make sure the URL has the correct protocol prefix - if not extracted_url.startswith(('http://', 'https://')): - if not extracted_url.startswith('//'): - extracted_url = 'https://' + extracted_url - else: - extracted_url = 'https:' + extracted_url - else: - extracted_url = url_element - - # Extract snippet directly from search results page - snippet_element = result.select_one('.result__snippet') - snippet = snippet_element.text.strip() if snippet_element else 'No description' - - # Now fetch the actual page content for the data field - target_page_soup = self.read_url(extracted_url) - - results.append({ - 'title': title, - 'url': extracted_url, - 'snippet': snippet, - 'data': str(target_page_soup) if isinstance(target_page_soup, BeautifulSoup) else "Error fetching page" - }) - - # Limit the number of results if needed - if len(results) >= self.max_result: - break - - return results - - - def read_url(self, url: str) -> BeautifulSoup: - """ - Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück. - Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben. - - Args: - url: Die zu lesende URL - - Returns: - BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern - """ - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml', - 'Accept-Language': 'en-US,en;q=0.9', - } - - try: - import time - - # Initialer Request - response = requests.get(url, headers=headers, timeout=10) - - # Polling für Status 202 - if response.status_code == 202: - # Maximal 3 Versuche mit steigenden Intervallen - backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s - - for wait_time in backoff_times: - time.sleep(wait_time) # Warten mit steigender Zeit - response = requests.get(url, headers=headers, timeout=10) - - # Wenn kein 202 mehr, dann abbrechen - if response.status_code != 202: - break - - # Für andere Fehler-Status einen Fehler auslösen - response.raise_for_status() - - # HTML parsen - return BeautifulSoup(response.text, 'html.parser') - - except Exception as e: - # Leeres BeautifulSoup-Objekt erstellen - return BeautifulSoup("", 'html.parser') - - - def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]: - # Extract snippet/description - snippet_element = data.select_one('.result__snippet') - snippet = snippet_element.text.strip() if snippet_element else 'No description' - - result={ - 'title': title, - 'url': url, - 'snippet': snippet, - 'data': data.prettify() - } - return result - - - def _old_scrape_url(self, url: str) -> str: - try: - logger.info(f"Requesting URL: {url}") - response = requests.get(url, headers=self.headers, timeout=self.timeout) - response.raise_for_status() - - soup = BeautifulSoup(response.text, 'html.parser') - title = soup.title.string if soup.title else "No title" - for element in soup.select('script, style, meta, noscript, iframe, nav, footer, header, aside'): - element.extract() - main_content = "" - - # Common content containers - content_selectors = [ - 'main', '#main', '.main', - 'article', '.article', - '#content', '.content', - '.post', '#post', - '.entry-content', '.post-content', - '.page-content', '.article-content' - ] - - # Try each selector - for selector in content_selectors: - elements = soup.select(selector) - if elements: - main_content = elements[0].get_text(separator='\n', strip=True) - logger.info(f"Found content using selector: {selector}") - break - - # If no main content found, use body text - if not main_content: - main_content = soup.body.get_text(separator='\n', strip=True) - logger.info("Using body text as no main content container found") - - # Clean up the text - lines = [] - for line in main_content.split('\n'): - line = line.strip() - if line and len(line) > 15: # Skip very short lines - lines.append(line) - - main_content = '\n'.join(lines) - - # Truncate if too long - if len(main_content) > self.max_content_length: - main_content = main_content[:self.max_content_length] + "...\n[Inhalt gekürzt]" - - return main_content.strip() - - except Exception as e: - logger.error(f"Fehler beim Scrapen von {url}: {str(e)}") - return f"[Fehler beim Scrapen von {url}: {str(e)}]" - - - def _old_extract_urls_from_search_results(self, html_content: str) -> List[str]: - """ - Extracts URLs from search engine results. - - Args: - html_content: HTML content of the search results page - - Returns: - List of extracted URLs - """ - - soup = BeautifulSoup(html_content, 'html.parser') - urls = [] - - # Different search engines have different HTML structures - # Google links - for a_tag in soup.select('a[href^="/url?"]'): - href = a_tag.get('href', '') - if '/url?q=' in href: - url = href.split('/url?q=')[1].split('&')[0] - url = urllib.parse.unquote(url) - if url.startswith('http') and url not in urls: - urls.append(url) - - # Bing links - for a_tag in soup.select('a[href^="http"]'): - url = a_tag.get('href', '') - excluded_domains = getattr(self, 'excluded_domains', []) - if (url.startswith('http') and - not any(domain in url for domain in excluded_domains) and - url not in urls): - urls.append(url) - - # Yahoo links - for a_tag in soup.select('a.d-ib'): - url = a_tag.get('href', '') - if url.startswith('http') and url not in urls: - urls.append(url) - - # If no URLs found, try a more generic approach - if not urls: - for a_tag in soup.find_all('a', href=True): - url = a_tag['href'] - excluded_domains = getattr(self, 'excluded_domains', []) - if (url.startswith('http') and - not any(domain in url for domain in excluded_domains) and - url not in urls): - urls.append(url) - - # Limit the number of results - return urls[:self.max_urls] - - - - -# Singleton-Instanz -_webcrawler_agent = None - -def get_webcrawler_agent(): - """Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück""" - global _webcrawler_agent - if _webcrawler_agent is None: - _webcrawler_agent = WebcrawlerAgent() - return _webcrawler_agent \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_base.py b/gwserver/_old_bk_modules/agentservice_base.py deleted file mode 100644 index 67348e5d..00000000 --- a/gwserver/_old_bk_modules/agentservice_base.py +++ /dev/null @@ -1,124 +0,0 @@ -""" -Erweiterte Basisklasse für Agenten im Agentservice. -Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit. -""" - -import logging -from typing import List, Dict, Any, Optional, Tuple - -logger = logging.getLogger(__name__) - -class BaseAgent: - """Basisklasse für alle Agenten im System""" - - def __init__(self): - """Initialisiert den Basis-Agenten""" - self.id = "base_agent" - self.name = "Base Agent" - self.type = "base" - self.description = "Basisagent als Vorlage für spezialisierte Agenten" - self.capabilities = "Grundlegende Agentenoperationen" - self.instructions = """ - Als Basis-Agent kannst du grundlegende Aufgaben erledigen. - Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden. - """ - # Neues Attribut für das Rückgabeformat - self.result_format = "Text" # Standard: Textformat - - def get_agent_info(self) -> Dict[str, Any]: - """ - Gibt Informationen über den Agenten zurück. - - Returns: - Dict mit Agenten-Informationen - """ - return { - "id": self.id, - "name": self.name, - "type": self.type, - "description": self.description, - "capabilities": self.capabilities, - "instructions": self.instructions, - "result_format": self.result_format, # Rückgabeformat hinzugefügt - "used": False, # Wird zur Laufzeit aktualisiert - "last_result_status": None # Wird zur Laufzeit aktualisiert - } - - def get_prompt(self, message_context: Dict[str, Any]) -> str: - """ - Generiert einen an den Agenten angepassten Prompt basierend auf Kontext. - - Args: - message_context: Kontext der Nachricht - - Returns: - Formatierter Prompt für den Agenten - """ - # Basis-Prompt, der von spezialisierten Agenten überschrieben werden kann - base_prompt = f""" - Du bist {self.name}, ein {self.type} Agent. - - {self.description} - - Deine Fähigkeiten: {self.capabilities} - - {self.instructions} - - Rückgabeformat: {self.result_format} - - Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage. - Deklariere am Ende deiner Antwort den Status deines Ergebnisses: - [STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast - [STATUS: TEILWEISE] - Wenn du ein teilweises Ergebnis geliefert hast - [STATUS: PLAN] - Wenn du nur einen Plan vorgeschlagen hast - """ - - return base_prompt.strip() - - async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und generiert eine Antwort. - Sollte von spezialisierten Agenten überschrieben werden. - - Args: - message: Die zu verarbeitende Nachricht - context: Zusätzlicher Kontext (optional) - - Returns: - Die generierte Antwort - """ - # Basis-Implementierung, die einfach eine Standardantwort zurückgibt - return { - "role": "assistant", - "content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]", - "agent_type": self.type, - "result_format": self.result_format # Rückgabeformat in der Antwort - } - - def extract_status(self, content: str) -> Tuple[str, str]: - """ - Extrahiert den Status aus dem Inhalt der Antwort. - - Args: - content: Inhalt der Antwort - - Returns: - Tuple mit (bereinigter Text, Status) - """ - import re - - # Standard-Status, falls keine Deklaration gefunden wird - status = "UNBEKANNT" - - # Suche nach Status-Deklaration - status_pattern = r'\[STATUS:\s*(ERGEBNIS|TEILWEISE|PLAN)\]' - match = re.search(status_pattern, content, re.IGNORECASE) - - if match: - # Extrahiere den Status - status = match.group(1).upper() - - # Entferne die Status-Deklaration aus dem Text - content = re.sub(status_pattern, '', content, flags=re.IGNORECASE).strip() - - return content, status \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_code_executor.py b/gwserver/_old_bk_modules/agentservice_code_executor.py deleted file mode 100644 index 8a8b92ac..00000000 --- a/gwserver/_old_bk_modules/agentservice_code_executor.py +++ /dev/null @@ -1,556 +0,0 @@ -# code_executor.py - -import os -import sys -import uuid -import subprocess -import tempfile -import re -from typing import Dict, List, Optional, Tuple, Any -import importlib.util -import logging - -# Logging einrichten -logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') -logger = logging.getLogger(__name__) - -class CodeExecutor: - """ - Führt generierten Code in einer isolierten virtuellen Umgebung aus, - während Zugriff auf spezifische App-Module gewährt wird und - automatisch erforderliche Pakete installiert werden. - """ - - def __init__(self, - app_modules: List[str] = None, - venv_path: Optional[str] = None, - timeout: int = 30, - max_memory_mb: int = 512, - allowed_packages: List[str] = None, - blocked_packages: List[str] = None): - """ - Initialisiert den CodeExecutor. - - Args: - app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen - venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt - timeout: Maximale Ausführungszeit in Sekunden - max_memory_mb: Maximaler Arbeitsspeicher in MB - allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte) - blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive) - """ - self.app_modules = app_modules or [] - self.venv_path = venv_path - self.timeout = timeout - self.max_memory_mb = max_memory_mb - self.temp_dir = None - self.allowed_packages = allowed_packages - self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"] - - def _create_venv(self) -> str: - """Erstellt eine virtuelle Umgebung und gibt den Pfad zurück.""" - if self.venv_path and os.path.exists(self.venv_path): - return self.venv_path - - # Temporäres Verzeichnis für die virtuelle Umgebung erstellen - self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_") - venv_path = os.path.join(self.temp_dir, "venv") - - try: - # Virtuelle Umgebung erstellen - logger.info(f"Erstelle virtuelle Umgebung in {venv_path}") - subprocess.run([sys.executable, "-m", "venv", venv_path], - check=True, - capture_output=True) - return venv_path - except subprocess.CalledProcessError as e: - logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}") - raise RuntimeError(f"Konnte venv nicht erstellen: {e}") - - def _get_pip_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung.""" - if os.name == 'nt': # Windows - return os.path.join(venv_path, "Scripts", "pip.exe") - else: # Unix/Linux - return os.path.join(venv_path, "bin", "pip") - - def _get_python_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung.""" - if os.name == 'nt': # Windows - return os.path.join(venv_path, "Scripts", "python.exe") - else: # Unix/Linux - return os.path.join(venv_path, "bin", "python") - - def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]: - """ - Installiert Pakete in der virtuellen Umgebung. - - Args: - packages: Liste der zu installierenden Pakete - venv_path: Pfad zur virtuellen Umgebung - - Returns: - Tuple aus (Erfolg, Fehlermeldung) - """ - if not packages: - return True, "" - - # Überprüfen, ob Pakete erlaubt sind - blocked = [] - for package in packages: - # Paketname ohne Version extrahieren - pkg_name = re.split('[=<>]', package)[0].strip() - - if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]: - blocked.append(pkg_name) - - if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]: - blocked.append(pkg_name) - - if blocked: - return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}" - - # Pakete installieren - pip_executable = self._get_pip_executable(venv_path) - logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}") - - try: - # pip aktualisieren - subprocess.run( - [pip_executable, "install", "--upgrade", "pip"], - check=True, - capture_output=True, - timeout=60 - ) - - # Pakete installieren - process = subprocess.run( - [pip_executable, "install"] + packages, - check=True, - capture_output=True, - text=True, - timeout=120 # 2 Minuten Timeout für Paketinstallation - ) - - return True, process.stdout - except subprocess.CalledProcessError as e: - error_msg = f"Fehler bei der Paketinstallation: {e.stderr}" - logger.error(error_msg) - return False, error_msg - except subprocess.TimeoutExpired: - return False, "Zeitüberschreitung bei der Paketinstallation." - except Exception as e: - return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}" - - def _extract_required_packages(self, code: str) -> List[str]: - """ - Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements - und Pip-Installationsanweisungen. - - Args: - code: Der Python-Code - - Returns: - Liste der erkannten Paketnamen - """ - packages = set() - - # Paketkommentare erkennen (# pip install package) - pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code) - for comment in pip_comments: - for pkg in comment.split(): - if pkg and not pkg.startswith('-'): - packages.add(pkg.strip()) - - # Import-Statements analysieren - import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE) - - # Standardmodule, die nicht installiert werden müssen - std_modules = { - 'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json', - 'collections', 'itertools', 'functools', 'pathlib', 'shutil', - 'tempfile', 'uuid', 'subprocess', 'threading', 'logging', - 'traceback', 'io', 'copy' - } - - # Module der App, die nicht installiert werden müssen - app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules) - - for module in import_lines: - if module not in std_modules and module not in app_modules_prefixes: - packages.add(module) - - return list(packages) - - def _create_module_loader(self) -> str: - """ - Erstellt ein Hilfsskript, das App-Module in die venv importiert. - Gibt den Pfad zum Hilfsskript zurück. - """ - if not self.app_modules: - return "" - - # Temporäre Datei für den Module-Loader erstellen - module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - "module_loader.py") - - # Pfad zu den App-Modulen bestimmen - app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) - - # Modul-Loader-Code generieren - loader_code = f""" -import sys -import importlib.util -import os - -# App-Pfad zum Suchpfad hinzufügen -sys.path.insert(0, "{app_path}") - -# Module importieren -modules = {{}} -""" - - # Code zum Importieren der Module hinzufügen - for module_name in self.app_modules: - loader_code += f""" -try: - modules["{module_name}"] = __import__("{module_name}", fromlist=["*"]) - print(f"Modul '{module_name}' erfolgreich importiert") -except ImportError as e: - print(f"Fehler beim Importieren von '{module_name}': {{e}}") -""" - - # Loader-Datei schreiben - with open(module_loader_path, "w") as f: - f.write(loader_code) - - return module_loader_path - - def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Führt den generierten Code in einer isolierten Umgebung aus. - - Args: - code: Der auszuführende Python-Code - input_data: Eingabedaten für den Code (werden als JSON serialisiert) - - Returns: - Dict mit Ausführungsergebnissen, Ausgabe und Fehlern - """ - # Virtuelle Umgebung erstellen oder bestehende verwenden - venv_path = self._create_venv() - - # Erforderliche Pakete aus dem Code extrahieren - required_packages = self._extract_required_packages(code) - - # Pakete installieren, falls erforderlich - install_success = True - install_log = "" - if required_packages: - install_success, install_log = self._install_packages(required_packages, venv_path) - - if not install_success: - return { - "success": False, - "output": "", - "error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}", - "result": None, - "installed_packages": required_packages - } - - # Temporäre Datei für den Code erstellen - code_id = str(uuid.uuid4())[:8] - code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"ai_code_{code_id}.py") - - # Module-Loader erstellen - module_loader_path = self._create_module_loader() - - # Eingabedaten als JSON speichern, wenn vorhanden - input_path = "" - if input_data: - import json - input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"input_{code_id}.json") - with open(input_path, "w") as f: - json.dump(input_data, f) - - # Outputpfad für Ergebnisse - output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"output_{code_id}.json") - - # Wrapper für den Code erstellen, damit die App-Module verfügbar sind - wrapped_code = f""" -import sys -import json -import traceback -import os - -# Ergebnisstruktur -result = {{ - "success": False, - "output": "", - "error": "", - "result": None, - "installed_packages": {required_packages} -}} - -try: - # Module laden, falls erforderlich - if "{module_loader_path}": - module_loader = __import__("module_loader") - globals().update({{k: v for k, v in module_loader.modules.items()}}) - - # Eingabedaten laden, falls vorhanden - input_data = None - if "{input_path}": - with open("{input_path}", "r") as f: - input_data = json.load(f) - - # Ausgabeumleitung - from io import StringIO - original_stdout = sys.stdout - original_stderr = sys.stderr - captured_stdout = StringIO() - captured_stderr = StringIO() - sys.stdout = captured_stdout - sys.stderr = captured_stderr - - # Benutzercode ausführen - try: - # Den Code in einem lokalen Namespace ausführen - local_vars = {{"input_data": input_data}} - exec('''{code}''', globals(), local_vars) - - # Ergebnis speichern, falls eine Variable 'result' definiert wurde - if "result" in local_vars: - result["result"] = local_vars["result"] - - result["success"] = True - except Exception as e: - result["error"] = str(e) - result["error"] += "\\n" + traceback.format_exc() - finally: - # Ausgabe erfassen - result["output"] = captured_stdout.getvalue() - result["error"] += captured_stderr.getvalue() - - # Ausgabeumleitung zurücksetzen - sys.stdout = original_stdout - sys.stderr = original_stderr - -except Exception as outer_e: - result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}" - -# Ergebnis speichern -with open("{output_path}", "w") as f: - json.dump(result, f, default=str) -""" - - # Code in temporäre Datei schreiben - with open(code_file_path, "w") as f: - f.write(wrapped_code) - - # Python-Interpreter aus der virtuellen Umgebung bestimmen - python_executable = self._get_python_executable(venv_path) - - # Code ausführen - logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}") - try: - # Prozess mit Ressourcenbeschränkungen ausführen - cmd = [python_executable, code_file_path] - - # Umgebungsvariablen setzen, um Speicherlimit zu erzwingen - env = os.environ.copy() - if self.max_memory_mb: - if os.name == 'posix': # Unix/Linux - # Auf Unix-Systemen können wir ulimit verwenden - cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"] - elif os.name == 'nt': # Windows - # Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden - # Hier müsste eine komplexere Lösung implementiert werden - pass - - # Prozess starten und mit Timeout ausführen - process = subprocess.run( - cmd, - timeout=self.timeout, - env=env, - capture_output=True, - text=True - ) - - # Ergebnis aus der Ausgabedatei lesen - if os.path.exists(output_path): - with open(output_path, "r") as f: - import json - execution_result = json.load(f) - else: - execution_result = { - "success": False, - "output": process.stdout, - "error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}", - "result": None, - "installed_packages": required_packages - } - - except subprocess.TimeoutExpired: - execution_result = { - "success": False, - "output": "", - "error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)", - "result": None, - "installed_packages": required_packages - } - except Exception as e: - execution_result = { - "success": False, - "output": "", - "error": f"Fehler bei der Ausführung: {str(e)}", - "result": None, - "installed_packages": required_packages - } - - # Informationen zur Paketinstallation hinzufügen - if install_log: - execution_result["package_install_log"] = install_log - - # Temporäre Dateien aufräumen - self._cleanup_temp_files([code_file_path, input_path, output_path]) - - return execution_result - - def _cleanup_temp_files(self, file_paths: List[str]): - """Räumt temporäre Dateien auf.""" - for path in file_paths: - if path and os.path.exists(path): - try: - os.remove(path) - except Exception as e: - logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}") - - def cleanup(self): - """Räumt alle temporären Ressourcen auf.""" - if self.temp_dir and os.path.exists(self.temp_dir): - import shutil - try: - shutil.rmtree(self.temp_dir) - logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}") - except Exception as e: - logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}") - - def __del__(self): - """Aufräumen beim Garbage Collection.""" - self.cleanup() - - - - - - -# Beispiel zur Verwendung des erweiterten CodeExecutor in einem AI Chat - -# from code_executor import CodeExecutor - -def execute_ai_generated_code(prompt_result: str, input_data=None): - """ - Führt von einer KI generierten Code aus und installiert automatisch benötigte Pakete - - Args: - prompt_result: Der von der KI generierte Python-Code - input_data: Optionale Eingabedaten für den Code - - Returns: - Ergebnis der Code-Ausführung - """ - # Verfügbare App-Module definieren - available_modules = [ - "utils.sharepoint_crud", - # Weitere Module hier hinzufügen - ] - - # Liste erlaubter Pakete (optional) - allowed_packages = None # None bedeutet alle erlaubt, außer blockierte - - # Liste blockierter Pakete (Sicherheitsrisiken oder ressourcenintensive Pakete) - blocked_packages = [ - "cryptography", "flask", "django", "tornado", # Sicherheit - "tensorflow", "pytorch", "scikit-learn", # Ressourcenintensiv - ] - - # CodeExecutor initialisieren - executor = CodeExecutor( - app_modules=available_modules, - timeout=120, # 2 Minuten Timeout - max_memory_mb=1024, # 1GB Speicherlimit - allowed_packages=allowed_packages, - blocked_packages=blocked_packages - ) - - try: - # Code ausführen - result = executor.execute_code(prompt_result, input_data) - - if result["success"]: - print("Code erfolgreich ausgeführt!") - print(f"Ausgabe: {result['output']}") - - # Zeige installierte Pakete an - if "installed_packages" in result and result["installed_packages"]: - print(f"Installierte Pakete: {', '.join(result['installed_packages'])}") - - return result["result"] - else: - print(f"Fehler bei der Ausführung: {result['error']}") - return None - finally: - # Aufräumen - executor.cleanup() - -# Beispiel für die Verwendung -if __name__ == "__main__": - # Angenommen, dies ist der von der KI generierte Code mit Paketabhängigkeiten - ai_generated_code = """ -# pip install pandas matplotlib -import pandas as pd -import matplotlib.pyplot as plt -import utils.sharepoint_crud as sp - -# Daten aus input_data verwenden -file_path = input_data.get('file_path') -site_url = input_data.get('site_url') - -# Beispieldaten erstellen -data = pd.DataFrame({ - 'Monat': ['Jan', 'Feb', 'Mär', 'Apr', 'Mai'], - 'Umsatz': [1200, 1400, 1300, 1500, 1800] -}) - -# Plot erstellen -plt.figure(figsize=(10, 6)) -plt.bar(data['Monat'], data['Umsatz']) -plt.title('Umsatz nach Monat') -plt.savefig('umsatz_plot.png') -print('Diagramm erstellt und gespeichert') - -# SharePoint-Datei hochladen -result = sp.upload_file(file_path, site_url) -print(f"Datei wurde hochgeladen: {result}") - -# Ergebnis zurückgeben -result = { - 'data': data.to_dict(), - 'plot_saved': True, - 'upload_result': result -} -""" - - # Daten für den Code bereitstellen - data = { - "file_path": "/path/to/document.docx", - "site_url": "https://example.sharepoint.com/sites/mysite" - } - - # Code ausführen - execute_ai_generated_code(ai_generated_code, data) - diff --git a/gwserver/_old_bk_modules/agentservice_dataextraction.py b/gwserver/_old_bk_modules/agentservice_dataextraction.py deleted file mode 100644 index 2f223bdd..00000000 --- a/gwserver/_old_bk_modules/agentservice_dataextraction.py +++ /dev/null @@ -1,475 +0,0 @@ -""" -Hilfsfunktion für die intelligente Extraktion von Dateninhalten (Fortsetzung). -""" - -from datetime import datetime -import logging -import json -from typing import List, Dict, Any, Optional, Tuple -import asyncio -import copy - -# Import erweiterte Dateiverarbeitung -from gateway.gwserver.modules.agentservice_filemanager import extract_text_from_file_content - -logger = logging.getLogger(__name__) - -async def data_extraction( - prompt: str, - files: List[Dict[str, Any]], - messages: List[Dict[str, Any]], - ai_service, - lucydom_interface = None, - workflow_id: str = None, - add_log_func = None -) -> Dict[str, Any]: - """ - Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen, - und führt dann die notwendigen Extraktionen durch. - - Args: - prompt: Spezifizierung, welche Daten extrahiert werden sollen - files: Liste aller verfügbaren Dateien mit Metadaten - messages: Liste aller Nachrichten im Workflow - ai_service: Service für KI-Anfragen - lucydom_interface: Interface für Datenbankzugriffe (optional) - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs - - Returns: - Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen - """ - try: - # 1. AI Call zur Bestimmung der notwendigen Extraktionen - extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func) - - # 2. Extraktionen durchführen - extracted_data = await _execute_extractions( - extraction_plan, - files, - lucydom_interface, - ai_service, - workflow_id, - add_log_func - ) - - # 3. Extrahierte Daten strukturieren - structured_result = _structure_extracted_data(extracted_data, files, prompt) - - return structured_result - - except Exception as e: - logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True) - - # Fehler-Log hinzufügen - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error") - - # Fehler-Ergebnis zurückgeben - return { - "error": str(e), - "status": "error", - "files_processed": len(files), - "message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}" - } - -async def _create_extraction_plan( - prompt: str, - files: List[Dict[str, Any]], - messages: List[Dict[str, Any]], - ai_service, - workflow_id: str = None, - add_log_func = None -) -> List[Dict[str, Any]]: - """ - Erstellt einen Extraktionsplan mit AI-Unterstützung. - - Args: - prompt: Spezifizierung, welche Daten extrahiert werden sollen - files: Liste aller verfügbaren Dateien mit Metadaten - messages: Liste aller Nachrichten im Workflow - ai_service: Service für KI-Anfragen - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs - - Returns: - Extraktionsplan (Liste von Extraktionsanweisungen pro Datei) - """ - # Erstelle Kontext-Informationen für den AI Call - file_infos = [] - for file in files: - # Basis-Metadaten - file_info = { - "id": file.get("id", ""), - "name": file.get("name", ""), - "type": file.get("type", ""), - "content_type": file.get("content_type", ""), - "size": file.get("size", "") - } - - # Extraktionsstatus prüfen (falls vorhanden) - doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages) - - if doc_contents: - # Prüfen, ob mindestens ein Content mit is_extracted=True existiert - already_extracted = any( - content.get("is_extracted", False) for content in doc_contents - ) - file_info["already_extracted"] = already_extracted - - # Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar) - for content in doc_contents: - if content.get("type") == "text" and content.get("text"): - preview_text = content.get("text", "")[:200] + "..." if len(content.get("text", "")) > 200 else content.get("text", "") - file_info["content_preview"] = preview_text - break - else: - file_info["already_extracted"] = False - - file_infos.append(file_info) - - # AI-Prompt erstellen - extraction_prompt = f""" - Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien - und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen. - - AUFGABE: - {prompt} - - VERFÜGBARE DATEIEN: - {json.dumps(file_infos, indent=2)} - - Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen: - 1. file_id: Die ID der zu extrahierenden Datei - 2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird) - 3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien) - 4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist) - - Format: - [ - {{ - "file_id": "1234", - "extract_needed": true, - "extraction_prompt": "Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen", - "importance": 5 - }}, - ... - ] - - Gib nur das JSON-Array zurück, ohne weitere Erklärungen. - """ - - # Log hinzufügen - if add_log_func and workflow_id: - add_log_func(workflow_id, "Extraktionsplan wird erstellt...", "info") - - try: - # AI-Call durchführen - extraction_plan_response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}]) - - # JSON aus der Antwort extrahieren - import re - json_match = re.search(r'\[.*\]', extraction_plan_response, re.DOTALL) - - if json_match: - extraction_plan = json.loads(json_match.group(0)) - - # Log hinzufügen - if add_log_func and workflow_id: - add_log_func( - workflow_id, - f"Extraktionsplan erstellt für {len(extraction_plan)} Dateien", - "info" - ) - - return extraction_plan - else: - # Fallback bei Parsing-Problemen - if add_log_func and workflow_id: - add_log_func( - workflow_id, - "Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan", - "warning" - ) - - # Standard-Plan: Alle nicht extrahierten Dateien extrahieren - default_plan = [] - for file in files: - doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages) - already_extracted = any( - content.get("is_extracted", False) for content in doc_contents - ) if doc_contents else False - - default_plan.append({ - "file_id": file.get("id", ""), - "extract_needed": not already_extracted, - "extraction_prompt": f"Extrahiere alle relevanten Informationen aus {file.get('name', '')}", - "importance": 3 - }) - - return default_plan - - except Exception as e: - logger.error(f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", exc_info=True) - - if add_log_func and workflow_id: - add_log_func( - workflow_id, - f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", - "error" - ) - - # Leerer Plan bei Fehlern - return [] - -async def _execute_extractions( - extraction_plan: List[Dict[str, Any]], - files: List[Dict[str, Any]], - lucydom_interface, - ai_service, - workflow_id: str = None, - add_log_func = None -) -> List[Dict[str, Any]]: - """ - Führt die geplanten Extraktionen durch. - - Args: - extraction_plan: Liste von Extraktionsanweisungen - files: Liste aller verfügbaren Dateien - lucydom_interface: Interface für Datenbankzugriffe - ai_service: Service für KI-Anfragen - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs - - Returns: - Liste mit extrahierten Daten pro Datei - """ - extracted_data = [] - - # Nach Wichtigkeit sortieren - sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True) - - for extraction_item in sorted_plan: - file_id = extraction_item.get("file_id") - extract_needed = extraction_item.get("extract_needed", False) - extraction_prompt = extraction_item.get("extraction_prompt", "") - - # Dateimetadaten finden - file_metadata = next((f for f in files if f.get("id") == file_id), None) - - if not file_metadata: - logger.warning(f"Datei mit ID {file_id} nicht gefunden") - continue - - file_name = file_metadata.get("name", "") - file_type = file_metadata.get("type", "") - content_type = file_metadata.get("content_type", "") - - # Log hinzufügen - if add_log_func and workflow_id: - add_log_func( - workflow_id, - f"Verarbeite Datei: {file_name} (Extraktion notwendig: {extract_needed})", - "info" - ) - - # Extraktion nur durchführen, wenn notwendig - if extract_needed: - # Dateiinhalt über LucyDOM-Interface abrufen - if lucydom_interface: - try: - file_content = await lucydom_interface.read_file_content(file_id) - - if not file_content: - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Datei {file_name} nicht gefunden", "warning") - continue - - # Extraktion basierend auf Dateityp durchführen - if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): - # Bildanalyse mit AI-Service - if ai_service and hasattr(ai_service, "analyze_image"): - try: - image_analysis = await ai_service.analyze_image( - image_data=file_content, - prompt=extraction_prompt, - mime_type=content_type - ) - - extracted_data.append({ - "file_id": file_id, - "name": file_name, - "type": file_type, - "content": image_analysis, - "is_extracted": True, - "extraction_method": "image_analysis" - }) - - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Bild {file_name} erfolgreich analysiert", "info") - except Exception as e: - logger.error(f"Fehler bei der Bildanalyse {file_name}: {str(e)}") - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Fehler bei der Bildanalyse {file_name}: {str(e)}", "error") - else: - # Fallback, wenn keine Bildanalyse verfügbar - extracted_data.append({ - "file_id": file_id, - "name": file_name, - "type": file_type, - "content": f"Bild: {file_name} (Analyse nicht verfügbar)", - "is_extracted": False, - "extraction_method": "none" - }) - else: - # Text-basierte Extraktion für alle anderen Dateitypen - try: - content, is_extracted = extract_text_from_file_content( - file_content, file_name, content_type - ) - - extracted_data.append({ - "file_id": file_id, - "name": file_name, - "type": file_type, - "content": content, - "is_extracted": is_extracted, - "extraction_method": "text_extraction" - }) - - if add_log_func and workflow_id: - add_log_func( - workflow_id, - f"Datei {file_name} extrahiert (Status: {is_extracted})", - "info" - ) - except Exception as e: - logger.error(f"Fehler bei der Textextraktion {file_name}: {str(e)}") - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Fehler bei der Textextraktion {file_name}: {str(e)}", "error") - except Exception as e: - logger.error(f"Fehler beim Lesen der Datei {file_name}: {str(e)}") - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Fehler beim Lesen der Datei {file_name}: {str(e)}", "error") - else: - logger.warning(f"Kein LucyDOM-Interface verfügbar für Datei {file_name}") - if add_log_func and workflow_id: - add_log_func(workflow_id, f"Kein LucyDOM-Interface verfügbar für Datei {file_name}", "warning") - else: - # Keine Extraktion notwendig, vorhandene Inhalte verwenden - doc_contents = _extract_document_contents_from_messages(file_id, messages) - - if doc_contents: - # Ersten Textinhalt verwenden - for content in doc_contents: - if content.get("type") == "text": - extracted_data.append({ - "file_id": file_id, - "name": file_name, - "type": file_type, - "content": content.get("text", ""), - "is_extracted": content.get("is_extracted", False), - "extraction_method": "existing_content" - }) - break - else: - # Keine vorhandenen Inhalte gefunden - extracted_data.append({ - "file_id": file_id, - "name": file_name, - "type": file_type, - "content": f"Keine Inhalte verfügbar für {file_name}", - "is_extracted": False, - "extraction_method": "none" - }) - - return extracted_data - -def _structure_extracted_data( - extracted_data: List[Dict[str, Any]], - files: List[Dict[str, Any]], - prompt: str -) -> Dict[str, Any]: - """ - Strukturiert die extrahierten Daten in ein formatiertes Ergebnis. - - Args: - extracted_data: Liste der extrahierten Daten pro Datei - files: Liste aller verfügbaren Dateien - prompt: Ursprünglicher Extraktionsprompt - - Returns: - Strukturiertes Ergebnisobjekt - """ - # Basis-Struktur erstellen - result = { - "prompt": prompt, - "files_processed": len(extracted_data), - "total_files": len(files), - "extraction_timestamp": datetime.now().isoformat(), - "status": "success", - "extracted_content": [] - } - - # Extrahierte Inhalte hinzufügen - for data_item in extracted_data: - # Datei Metadaten anreichern - file_id = data_item.get("file_id", "") - file_metadata = next((f for f in files if f.get("id") == file_id), {}) - - content_item = { - "file_id": file_id, - "name": data_item.get("name", file_metadata.get("name", "")), - "type": data_item.get("type", file_metadata.get("type", "")), - "content_type": file_metadata.get("content_type", ""), - "size": file_metadata.get("size", ""), - "is_extracted": data_item.get("is_extracted", False), - "extraction_method": data_item.get("extraction_method", ""), - "content": data_item.get("content", "") - } - - result["extracted_content"].append(content_item) - - return result - -def _extract_document_contents_from_messages(file_id: str, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Extrahiert Document-Contents für eine bestimmte Datei aus den Workflow-Nachrichten. - - Args: - file_id: ID der Datei - messages: Liste aller Nachrichten im Workflow - - Returns: - Liste der Document-Contents für die angegebene Datei - """ - contents = [] - - for message in messages: - # Dokumente in der Nachricht durchsuchen - for document in message.get("documents", []): - source = document.get("source", {}) - - # Prüfen, ob die Datei-ID übereinstimmt - if source.get("id") == file_id or source.get("type") == "file" and source.get("id") == file_id: - # Contents der Datei hinzufügen - doc_contents = document.get("contents", []) - - if doc_contents: - contents.extend(doc_contents) - - return contents - -def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None): - """Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen""" - # Log über die Logger-Instanz - if log_type == "error": - logger.error(message) - elif log_type == "warning": - logger.warning(message) - else: - logger.info(message) - - # Log über die bereitgestellte Log-Funktion (falls vorhanden) - if add_log_func and workflow_id: - add_log_func(workflow_id, message, log_type, agent_id, agent_name) \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_filehandling.py b/gwserver/_old_bk_modules/agentservice_filehandling.py deleted file mode 100644 index 7e6932af..00000000 --- a/gwserver/_old_bk_modules/agentservice_filehandling.py +++ /dev/null @@ -1,638 +0,0 @@ -""" -Zentrales Filehandling-Modul für den Agentservice. -Enthält alle Funktionen für das Verarbeiten von Dateien. -Angepasst, um mit LucyDOMInterface als zentrale Datei-Autorität zu arbeiten. -""" - -import os -import logging -import base64 -import json -import uuid -from datetime import datetime -from typing import Dict, Any, List, Optional, Tuple, Union, BinaryIO -from io import BytesIO # Import BytesIO at the top level - -# Bibliotheken für Dateiverarbeitung -try: - import pandas as pd -except ImportError: - pd = None - -logger = logging.getLogger(__name__) - -# Custom exception für das File-Handling -class FileProcessingError(Exception): - """Basisklasse für Fehler bei der Dateiverarbeitung im AgentService.""" - pass - -class FileExtractionError(FileProcessingError): - """Fehler bei der Textextraktion aus Dateien.""" - pass - -class FileAnalysisError(FileProcessingError): - """Fehler bei der Analyse von Dateien.""" - pass - -def encode_to_base64(content: bytes, mime_type: str = None) -> str: - """ - Kodiert Binärdaten als Base64-String. - - Args: - content: Die zu kodierenden Binärdaten - mime_type: Optionaler MIME-Typ für das Encoding - - Returns: - Base64-kodierter String - """ - base64_data = base64.b64encode(content).decode('utf-8') - return base64_data - -def prepare_file_contexts(files: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Bereitet die Dateikontexte basierend auf Metadaten vor. - Akzeptiert keine Pfade mehr, sondern nur Metadaten aus der Datenbank. - - Args: - files: Liste von Dateien mit Metadaten (Dict mit id, name, type, content_type) - - Returns: - Liste von Dateikontexten für die Verarbeitung - """ - file_contexts = [] - - logger.info(f"Preparing file contexts for {len(files)} files") - - for file in files: - file_id = file.get("id") - file_name = file.get("name") - file_type = file.get("type") - - # Create a comprehensive context with all available metadata - context = { - "id": file_id, - "name": file_name, - "type": file_type, - "size": file.get("size", "Unbekannt"), - "content_type": file.get("content_type"), - "path": file.get("path"), - "upload_date": file.get("upload_date"), - "hash": file.get("hash"), - "mandate_id": file.get("mandate_id"), - "user_id": file.get("user_id") - } - - # Log for debugging - logger.info(f"Created file context: {file_name} (ID: {file_id}, Type: {file_type})") - - file_contexts.append(context) - - return file_contexts - -def is_text_extractable(file_name: str, content_type: str = None) -> bool: - """ - Prüft, ob aus der Datei Text extrahiert werden kann. - - Args: - file_name: Name der Datei für die Erkennung des Formats - content_type: Optional MIME-Typ der Datei - - Returns: - True wenn Text extrahiert werden kann, sonst False - """ - # Einfache Textdateien - if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')): - return True - - # Excel-Dateien - elif file_name.endswith(('.xlsx', '.xls')): - return pd is not None # Nur extrahierbar, wenn pandas installiert ist - - # PDF-Dateien - Textextraktion ist möglich - elif file_name.endswith('.pdf'): - try: - # Prüfen ob PyPDF2 oder PyMuPDF installiert sind - try: - import PyPDF2 - return True - except ImportError: - try: - import fitz # PyMuPDF - return True - except ImportError: - return False - except: - return False - - # Bildformate - nicht als Text extrahierbar - elif file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg')): - return False - - # Video-Formate - nicht als Text extrahierbar - elif file_name.endswith(('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')): - return False - - # Audio-Formate - nicht als Text extrahierbar - elif file_name.endswith(('.mp3', '.wav', '.ogg', '.flac', '.aac')): - return False - - # Content-Type prüfen, falls Dateiendung nicht eindeutig ist - if content_type: - if content_type.startswith(('text/', 'application/json', 'application/xml')): - return True - elif content_type == 'application/pdf': - return True - elif content_type.startswith(('image/', 'video/', 'audio/')): - return False - - # Im Zweifelsfall versuchen zu extrahieren - return True - -def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]: - """ - Extrahiert Text aus verschiedenen Dateiformaten basierend auf dem Binärinhalt. - - Args: - file_content: Binärinhalt der Datei - file_name: Name der Datei für die Erkennung des Formats - content_type: Optional MIME-Typ der Datei - - Returns: - Tuple mit (extrahierter Text, is_extracted Flag) - """ - # Prüfen, ob Text extrahierbar ist - if not is_text_extractable(file_name, content_type): - return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False - - try: - # Einfache Textdateien - if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py')): - try: - return file_content.decode('utf-8'), True - except UnicodeDecodeError: - try: - return file_content.decode('latin1'), True - except: - return file_content.decode('cp1252', errors='replace'), True - - # Excel-Dateien - elif file_name.endswith(('.xlsx', '.xls')): - if pd is not None: - # Temporäre Datei im Speicher erstellen - file_obj = BytesIO(file_content) - df = pd.read_excel(file_obj) - result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n" - result += f"Columns: {', '.join(df.columns.tolist())}\n\n" - result += df.to_string(index=False) - return result, True - else: - return f"[Excel-Datei: {file_name} - pandas nicht installiert]", False - - # CSV-Dateien - elif file_name.endswith('.csv'): - if pd is not None: - try: - # Temporäre Datei im Speicher erstellen - file_obj = BytesIO(file_content) - df = pd.read_csv(file_obj, encoding='utf-8') - except UnicodeDecodeError: - file_obj = BytesIO(file_content) - try: - df = pd.read_csv(file_obj, encoding='latin1') - except: - file_obj = BytesIO(file_content) - df = pd.read_csv(file_obj, encoding='cp1252') - - result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n" - result += f"Columns: {', '.join(df.columns.tolist())}\n\n" - result += df.to_string(index=False) - return result, True - else: - return f"[CSV-Datei: {file_name} - pandas nicht installiert]", False - - # PDF-Dateien - elif file_name.endswith('.pdf'): - try: - try: - from PyPDF2 import PdfReader - reader = PdfReader(BytesIO(file_content)) - text = "" - for page in reader.pages: - text += page.extract_text() + "\n\n" - return text, True - except ImportError: - try: - import fitz # PyMuPDF - doc = fitz.open(stream=file_content, filetype="pdf") - text = "" - for page in doc: - text += page.get_text() + "\n\n" - return text, True - except ImportError: - return f"[PDF: {file_name} - Keine PDF-Bibliothek installiert]", False - except Exception as e: - raise FileExtractionError(f"Fehler beim Lesen der PDF-Datei {file_name}: {str(e)}") - - # Sonstige Dateien - else: - return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False - - except Exception as e: - logger.error(f"Fehler beim Extrahieren von Text aus {file_name}: {str(e)}") - return f"[Fehler bei der Textextraktion: {str(e)}]", False - -async def extract_and_analyze_pdf_images( - pdf_content: bytes, - prompt: str, - ai_service -) -> List[Dict[str, Any]]: - """ - Extrahiert Bilder aus einer PDF-Datei und analysiert sie. - Arbeitet mit Binärdaten statt Dateipfaden. - - Args: - pdf_content: Binärdaten der PDF-Datei - prompt: Prompt für die Bildanalyse - ai_service: AI-Service für die Bildanalyse - - Returns: - Liste mit Analyseergebnissen für jedes Bild - """ - image_responses = [] - temp_files = [] # Liste der temporären Dateien zur Bereinigung - - try: - # PDF mit PyMuPDF öffnen - import fitz # PyMuPDF - # BytesIO is already imported at the top level - import tempfile - - # PDF im Speicher öffnen - doc = fitz.open(stream=pdf_content, filetype="pdf") - logger.info(f"PDF geöffnet mit {len(doc)} Seiten") - - for page_num, page in enumerate(doc, 1): - # Alle Bilder auf der Seite finden - image_list = page.get_images(full=True) - - if image_list: - logger.info(f"Seite {page_num}: {len(image_list)} Bilder gefunden") - - for img_index, img in enumerate(image_list): - try: - # Bild-Referenz - xref = img[0] - - # Bild und Metadaten extrahieren - base_image = doc.extract_image(xref) - image_bytes = base_image["image"] # Tatsächliche Bilddaten - image_ext = base_image["ext"] # Dateiendung (jpg, png, etc.) - - # Erstelle temporäre Datei - fd, temp_img_path = tempfile.mkstemp(suffix=f".{image_ext}") - temp_files.append(temp_img_path) # Zur Bereinigungsliste hinzufügen - - with os.fdopen(fd, 'wb') as img_file: - img_file.write(image_bytes) - - logger.debug(f"Bild temporär gespeichert: {temp_img_path}") - - # Analysiere mit AI-Service - try: - analysis_result = await ai_service.analyze_image( - image_data=image_bytes, # Direktes Übergeben der Bilddaten - prompt=prompt, - mime_type=f"image/{image_ext}" - ) - logger.debug(f"Bildanalyse für Bild {img_index} auf Seite {page_num} abgeschlossen") - except Exception as analyze_error: - logger.error(f"Fehler bei der Bildanalyse: {str(analyze_error)}") - analysis_result = f"[Fehler bei der Bildanalyse: {str(analyze_error)}]" - - # Ergebnis speichern - try: - # Versuche zuerst, die Größe aus base_image zu bekommen - if 'width' in base_image and 'height' in base_image: - image_size = f"{base_image['width']}x{base_image['height']}" - else: - # Alternative: Öffne das temporäre Bild, um die Größe zu bestimmen - from PIL import Image - with Image.open(temp_img_path) as img: - width, height = img.size - image_size = f"{width}x{height}" - except Exception as e: - logger.warning(f"Konnte Bildgröße nicht ermitteln: {str(e)}") - image_size = "unbekannt" - - image_responses.append({ - "page": page_num, - "image_index": img_index, - "format": image_ext, - "image_size": image_size, - "response": analysis_result - }) - - except Exception as e: - logger.warning(f"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num}: {str(e)}") - continue - - logger.info(f"Extrahiert und analysiert: {len(image_responses)} Bilder aus PDF") - - except ImportError: - logger.error("PyMuPDF (fitz) ist nicht installiert. Installiere es mit 'pip install pymupdf'") - raise FileExtractionError("PyMuPDF (fitz) ist nicht installiert") - except Exception as e: - logger.error(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}") - raise FileExtractionError(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}") - finally: - # Bereinige alle temporären Dateien - for temp_file in temp_files: - try: - if os.path.exists(temp_file): - os.remove(temp_file) - except Exception as e: - logger.warning(f"Konnte temporäre Datei nicht entfernen: {temp_file} - {str(e)}") - - return image_responses - -def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde. - - Args: - message: Die zu erweiternde Nachricht - file_data: Dateimetadaten und Inhalt - - Returns: - Die aktualisierte Nachricht mit der Datei - """ - # Detailliertes Logging für Debugging - logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})") - - # Initialize documents array if needed - if "documents" not in message: - message["documents"] = [] - logger.debug("Initialized empty documents array in message") - - # Create a unique ID for the document if not provided - import uuid - doc_id = file_data.get("id", f"file_{uuid.uuid4()}") - - # Extract file size if available - file_size = file_data.get("size") - if isinstance(file_size, str) and file_size.isdigit(): - file_size = int(file_size) - elif file_size is None and file_data.get("content"): - # Estimate size from content if not provided - file_size = len(file_data.get("content", "")) - - # Bestimmen, ob der Inhalt bereits extrahiert wurde - content = file_data.get("content", "No content available") - file_name = file_data.get("name", "unnamed_file") - content_type = file_data.get("content_type") - - # Prüfen, ob der Inhalt als extrahiert markiert werden sollte - is_extracted = file_data.get("is_extracted", False) - if not is_extracted and isinstance(content, str) and content.strip() and file_name: - # Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp - is_extracted = is_text_extractable(file_name, content_type) - - # Create standard document structure that matches the data model - document = { - "id": doc_id, - "source": { - "type": "file", - "id": file_data.get("id", doc_id), - "name": file_name, - "content_type": content_type, - "size": file_size, - "upload_date": file_data.get("upload_date", datetime.now().isoformat()) - }, - "contents": [ - { - "type": "text", - "text": content, - "is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen - } - ] - } - - # Log document structure for debugging - logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}") - - # Check if file is already in the message to avoid duplicates - file_already_added = any( - doc.get("source", {}).get("id") == file_data.get("id") - for doc in message.get("documents", []) - ) - - if not file_already_added: - message["documents"].append(document) - logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)") - else: - logger.info(f"File {file_name} already exists in message, skipping") - - return message - -def extract_files_from_message(message: Dict[str, Any]) -> List[Dict[str, Any]]: - """ - Extrahiert Dateiinformationen aus einer Nachricht. - Funktion für Workflow-Manager und interne Verwendung. - - Args: - message: Die Nachricht, aus der Dateien extrahiert werden sollen - - Returns: - Liste der extrahierten Dateiinformationen - """ - files = [] - - if "documents" not in message: - logger.debug("No documents found in message") - return files - - # Log for debugging - logger.debug(f"Extracting files from message with {len(message.get('documents', []))} documents") - - for doc in message.get("documents", []): - doc_source = doc.get("source", {}) - - # Nur Dateien extrahieren - if doc_source.get("type") == "file": - file_info = { - "id": doc_source.get("id", f"file_{uuid.uuid4()}"), - "name": doc_source.get("name", "unnamed_file"), - "content_type": doc_source.get("content_type"), - "size": doc_source.get("size") - } - - # Inhalt extrahieren, falls vorhanden - doc_contents = doc.get("contents", []) - for content in doc_contents: - if content.get("type") == "text": - file_info["content"] = content.get("text", "") - break - - logger.debug(f"Extracted file: {file_info.get('name')} (ID: {file_info.get('id')})") - files.append(file_info) - else: - logger.debug(f"Skipping non-file document of type: {doc_source.get('type')}") - - logger.info(f"Extracted {len(files)} files from message") - return files - -async def read_file_contents( - file_contexts: List[Dict[str, Any]], - lucydom_interface, - workflow_id: str = None, - add_log_func = None, - ai_service = None # AI service parameter for image analysis -) -> Dict[str, Dict[str, Any]]: - """ - Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch. - Verwendet LucyDOM-Interface statt direkter Dateizugriffe. - Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück. - - Args: - file_contexts: Liste der Dateikontexte mit Metadaten - lucydom_interface: LucyDOM-Interface für Dateizugriffe - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs - ai_service: Optionaler AI-Service für die Bildanalyse - - Returns: - Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...}) - """ - file_contents = {} - - # Add debug logging - logger.info(f"Reading contents of {len(file_contexts)} files for workflow {workflow_id}") - - for file in file_contexts: - file_id = file["id"] - file_name = file["name"] - file_type = file.get("type", "unknown") - - try: - # Dateiinhalt über LucyDOM-Interface abrufen - file_data = await lucydom_interface.read_file_content(file_id) - - if not file_data: - _log(add_log_func, workflow_id, f"Datei {file_name} nicht gefunden", "warning") - file_contents[file_id] = { - "content": f"File content not available (File not found)", - "is_extracted": False, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - continue - - logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})") - - # Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar - if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): - if ai_service and hasattr(ai_service, "analyze_image"): - try: - image_analysis = await ai_service.analyze_image( - image_data=file_data, - prompt="Describe this image in detail", - mime_type=file.get("content_type") - ) - - logger.debug(f"Image analysis successfully generated for {file_name}") - - file_contents[file_id] = { - "content": f"Image Analysis:\n{image_analysis}", - "is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - _log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info") - except Exception as e: - logger.error(f"Error analyzing image {file_name}: {str(e)}") - _log(add_log_func, workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error") - file_contents[file_id] = { - "content": f"Image file: {file_name} (Analysis failed: {str(e)})", - "is_extracted": False, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - else: - file_contents[file_id] = { - "content": f"Image file: {file_name} (AI analysis not available)", - "is_extracted": False, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - - # Dokument- und Textdateien - elif file_type == "document" or not file_type: - # Verwende die zentrale Textextraktionsfunktion mit Dateiinhalt - content, is_extracted = extract_text_from_file_content( - file_data, file_name, file.get("content_type") - ) - file_contents[file_id] = { - "content": content, - "is_extracted": is_extracted, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - _log(add_log_func, workflow_id, - f"File {file_name} read successfully (extracted: {is_extracted})", "info") - - # Andere Dateitypen - nur Metadaten speichern - else: - file_contents[file_id] = { - "content": f"File: {file_name} (Type: {file_type}, content not available)", - "is_extracted": False, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - _log(add_log_func, workflow_id, f"Unsupported file type: {file_type} for {file_name}", "warning") - - except Exception as e: - logger.error(f"Error reading file {file_name}: {str(e)}") - _log(add_log_func, workflow_id, f"Error reading file {file_name}: {str(e)}", "error") - file_contents[file_id] = { - "content": f"File content not available (Error: {str(e)})", - "is_extracted": False, - "name": file_name, - "type": file_type, - "content_type": file.get("content_type") - } - - return file_contents - -def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None): - """Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen""" - # Log über die Logger-Instanz - if log_type == "error": - logger.error(message) - elif log_type == "warning": - logger.warning(message) - else: - logger.info(message) - - # Log über die bereitgestellte Log-Funktion (falls vorhanden) - if add_log_func and workflow_id: - add_log_func(workflow_id, message, log_type, agent_id, agent_name) - -def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None): - """Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen""" - # Log über die Logger-Instanz - if log_type == "error": - logger.error(message) - elif log_type == "warning": - logger.warning(message) - else: - logger.info(message) - - # Log über die bereitgestellte Log-Funktion (falls vorhanden) - if add_log_func and workflow_id: - add_log_func(workflow_id, message, log_type, agent_id, agent_name) diff --git a/gwserver/_old_bk_modules/agentservice_registry.py b/gwserver/_old_bk_modules/agentservice_registry.py deleted file mode 100644 index 4377d75d..00000000 --- a/gwserver/_old_bk_modules/agentservice_registry.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -Aktualisierte Registry für alle verfügbaren Agenten im System. -Enthält jetzt auch den FileCreator-Agenten. -""" - -import logging -import importlib -from typing import Dict, Any, List, Optional - -# Import direkt bekannter Agent-Module -# Andere Module werden dynamisch importiert -from modules.agentservice_base import BaseAgent - -logger = logging.getLogger(__name__) - -class AgentRegistry: - """Registry für alle verfügbaren Agenten im System""" - - _instance = None - - @classmethod - def get_instance(cls): - """Gibt eine Singleton-Instanz der Agent-Registry zurück""" - if cls._instance is None: - cls._instance = cls() - return cls._instance - - def __init__(self): - """Initialisiert die Agent-Registry""" - if AgentRegistry._instance is not None: - raise RuntimeError("Singleton-Instanz existiert bereits - nutze get_instance()") - self.agents = {} - self._load_agents() - - def _load_agents(self): - """Lädt alle verfügbaren Agenten""" - # Liste aller zu ladenden Agenten-Module - agent_modules = [ - "agentservice_agent_coder", - "agentservice_agent_analyst", - "agentservice_agent_webcrawler", - "agentservice_agent_sharepoint", - "agentservice_agent_documentation", - "agentservice_agent_filecreator" # Neuer FileCreator-Agent hinzugefügt - ] - - for module_name in agent_modules: - try: - # Importiere das Modul - try: - module = importlib.import_module(f"modules.{module_name}") - except ImportError: - module = importlib.import_module(module_name) - - # Suche nach der Agent-Klasse oder einer get_*_agent-Funktion - agent_type = module_name.split('_')[-1] - class_name = f"{agent_type.capitalize()}Agent" - getter_name = f"get_{agent_type}_agent" - - agent = None - - # Versuche, den Agenten über die get_*_agent-Funktion zu holen - if hasattr(module, getter_name): - getter_func = getattr(module, getter_name) - agent = getter_func() - logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) via {getter_name}() geladen") - - # Alternativ versuche, den Agenten direkt zu instanziieren - elif hasattr(module, class_name): - agent_class = getattr(module, class_name) - agent = agent_class() - logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) direkt instanziiert") - - if agent: - # Registriere den Agenten - self.register_agent(agent) - else: - logger.warning(f"Keine Agent-Klasse oder getter-Funktion in Modul {module_name} gefunden") - - except ImportError as e: - logger.warning(f"Modul {module_name} konnte nicht importiert werden: {e}") - except Exception as e: - logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}") - - def register_agent(self, agent: BaseAgent): - """Registriert einen Agenten in der Registry.""" - agent_type = agent.type - self.agents[agent_type] = agent - # Zusätzlich nach ID registrieren - self.agents[agent.id] = agent - logger.debug(f"Agent '{agent.name}' (Typ: {agent_type}) wurde registriert") - - def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]: - """ - Gibt eine Instanz eines Agenten nach ID oder Typ zurück. - - Args: - agent_identifier: ID oder Typ des gewünschten Agenten - - Returns: - Agent-Instanz oder None, wenn nicht gefunden - """ - # Versuche, direkt nach Typ zu finden - if agent_identifier in self.agents: - return self.agents[agent_identifier] - - # Wenn nicht gefunden, versuche verschiedene Varianten des Namens - variants = [ - agent_identifier, - agent_identifier.replace('_agent', ''), - f"{agent_identifier}_agent" - ] - - for variant in variants: - if variant in self.agents: - return self.agents[variant] - - logger.warning(f"Agent mit Identifier '{agent_identifier}' nicht gefunden") - return None - - def get_all_agents(self) -> Dict[str, BaseAgent]: - """Gibt alle registrierten Agenten zurück.""" - return self.agents - - def get_agent_infos(self) -> List[Dict[str, Any]]: - """Gibt Informationen zu allen registrierten Agenten zurück.""" - agent_infos = [] - # Nur einmal pro Agent-Instanz (da wir sowohl nach Typ als auch nach ID registrieren) - seen_agents = set() - for agent in self.agents.values(): - if agent not in seen_agents: - agent_infos.append(agent.get_agent_info()) - seen_agents.add(agent) - return agent_infos - - def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]: - """Initialisiert Agenten für einen Workflow.""" - initialized_agents = {} - seen_agents = set() - for agent in self.agents.values(): - if agent not in seen_agents: - agent_info = agent.get_agent_info() - agent_id = agent_info["id"] - initialized_agents[agent_id] = agent_info - seen_agents.add(agent) - return initialized_agents \ No newline at end of file diff --git a/gwserver/_old_bk_modules/agentservice_workflow_manager.py b/gwserver/_old_bk_modules/agentservice_workflow_manager.py deleted file mode 100644 index 7fb68c4c..00000000 --- a/gwserver/_old_bk_modules/agentservice_workflow_manager.py +++ /dev/null @@ -1,1333 +0,0 @@ -""" -Manager für Workflow-Ausführung im Agentservice. -Steuert den gesamten Ablauf eines Workflow-Durchlaufs. - -Implementiert die neue Workflow-Struktur und Ausführungslogik gemäß den Anforderungen. -Unterstützt sowohl neue Workflows als auch die Fortsetzung bestehender Workflows mit Benutzereingaben. -Angepasst für die verbesserte Dateibehandlung. -""" - -import os -import logging -import asyncio -import uuid -import json -from datetime import datetime -from typing import List, Dict, Any, Optional, Tuple, Union - -# Import von Modulen -from modules.agentservice_registry import AgentRegistry -from modules.agentservice_filemanager import prepare_file_contexts, read_file_contents, extract_files_from_message, add_file_to_message -from modules.agentservice_dataextraction import data_extraction - -# Import neuer Modellklassen -try: - from modules.lucydom_model import Message, Workflow, Document, DocumentSource, DocumentContent, DataStats -except ImportError: - # Fallback-Definitionen - class Message(Dict[str, Any]): pass - class Workflow(Dict[str, Any]): pass - class Document(Dict[str, Any]): pass - class DocumentSource(Dict[str, Any]): pass - class DocumentContent(Dict[str, Any]): pass - class DataStats(Dict[str, Any]): pass - -logger = logging.getLogger(__name__) - -registry = AgentRegistry.get_instance() -agent = registry.get_agent("user_agent") - -class WorkflowError(Exception): - """Basis-Exception für Workflow-Fehler""" - pass - -class WorkflowNotFoundError(WorkflowError): - """Exception wenn ein Workflow nicht gefunden wurde""" - pass - -class WorkflowExecutionError(WorkflowError): - """Exception bei der Ausführung eines Workflows""" - pass - -class WorkflowManager: - """Manager für die Ausführung von Workflows""" - - def __init__(self, mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None): - """ - Initialisiert den WorkflowManager. - - Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers - ai_service: Service für KI-Anfragen - lucydom_interface: Interface für Datenbankzugriffe (optional) - """ - self.mandate_id = mandate_id - self.user_id = user_id - self.ai_service = ai_service - self.lucydom_interface = lucydom_interface - - # Lade Konfiguration aus config.ini - import configload - config = configload.load_config() - - # Verzeichnisse für Ergebnisse und Uploads aus der Konfiguration lesen - self.results_dir = config.get('Module_AgentserviceInterface', 'RESULTS_DIR', fallback='results') - - # Maximale Anzahl an Nachrichten im Verlauf - self.max_history = int(config.get('Module_AgentserviceInterface', 'MAX_HISTORY', fallback='20')) - - # Stelle sicher, dass die Verzeichnisse existieren - os.makedirs(self.results_dir, exist_ok=True) - - # Aktive Workflows - self.workflows = {} - - # Lade aktive Workflows aus der Datenbank, falls verfügbar - if self.lucydom_interface: - self._load_active_workflows() - - logger.info(f"WorkflowManager initialisiert mit Mandant {mandate_id}, Benutzer {user_id}") - - - async def execute_workflow( - self, - message: Dict[str, Any], - workflow_id: Optional[str] = None, - files: List[Dict[str, Any]] = None, - is_user_input: bool = False # Parameter to identify user input - ) -> Dict[str, Any]: - """ - Führt einen Workflow aus, entweder durch Erstellen eines neuen oder - Fortsetzen eines bestehenden Workflows mit Benutzereingabe. - - Args: - message: Die Nachricht (Prompt oder Benutzereingabe) - workflow_id: Optional ID eines bestehenden Workflows - files: Optionale Liste von Dateimetadaten - is_user_input: Flag, das anzeigt, ob es sich um eine Benutzereingabe handelt - - Returns: - Dictionary mit Workflow-Status und Ergebnis - """ - # Add detailed debug logging - logger.info(f"execute_workflow called: workflow_id={workflow_id}, is_user_input={is_user_input}, message={message.get('content', '')[:50]}...") - - # Detailed file logging - if files: - logger.info(f"Files provided: {len(files)} files") - for file in files: - file_id = file.get('id', 'unknown') - file_name = file.get('name', 'unnamed') - file_type = file.get('type', 'unknown') - file_content_type = file.get('content_type', 'unknown') - logger.info(f"File: {file_name} (ID: {file_id}, Type: {file_type}, Content-Type: {file_content_type})") - else: - logger.info("No files provided with the message") - - # 4.1 Unterscheide zwischen neuem Workflow und bestehender Benutzereingabe - is_new_workflow = workflow_id is None - - if is_new_workflow: - # Variante (A): Neuen Workflow erstellen - workflow_id = f"wf_{uuid.uuid4()}" - workflow = self._initialize_workflow(workflow_id) - workflow["name"] = message.get("content", "")[:50] # Kurzer Titel aus dem Inhalt - workflow["status"] = "running" - self._add_log(workflow, "Neuer Workflow gestartet", "info") - else: - # Variante (B): Bestehenden Workflow laden - try: - workflow = await self.load_workflow(workflow_id) - if not workflow: - raise WorkflowNotFoundError(f"Workflow {workflow_id} nicht gefunden") - - # WICHTIG: Workflow-Status immer auf "running" setzen, unabhängig vom vorherigen Status - # So stellen wir sicher, dass der Workflow nach einer Benutzereingabe korrekt fortgesetzt wird - workflow["status"] = "running" - workflow["last_activity"] = datetime.now().isoformat() - self._add_log(workflow, "Workflow nach Benutzereingabe fortgesetzt", "info") - except WorkflowNotFoundError as e: - logger.error(f"Workflow nicht gefunden: {str(e)}") - return { - "workflow_id": workflow_id, - "status": "error", - "error": f"Workflow nicht gefunden: {workflow_id}" - } - except WorkflowError as e: - logger.error(f"Workflow-Fehler: {str(e)}") - return { - "workflow_id": workflow_id, - "status": "error", - "error": str(e) - } - - logger.debug(f"Workflow initialisiert: {workflow_id}, Status: {workflow['status']}") - - try: - # 4.2 Message-Initialisierung - # Letztes Message-Objekt abschließen (falls vorhanden) - if "messages" in workflow and workflow["messages"]: - self._finalize_last_message(workflow) - - # Neues Message-Objekt erstellen - new_message = self._create_message(workflow_id, message.get("role", "user")) - new_message["content"] = message.get("content", "") - - # Workflow-ID zum Message-Objekt hinzufügen für bessere Fehlerbehandlung - new_message["workflow_id"] = workflow_id - - # Log the message creation - logger.info(f"Created new message with ID {new_message['id']} and content: {new_message['content'][:50]}...") - - # 4.3 Dateivorbereitung - if files and len(files) > 0: - # Add detailed logging - logger.info(f"Processing {len(files)} files for message {new_message['id']}") - for f in files: - logger.info(f"Processing file: {f.get('name', 'unknown')} (ID: {f.get('id', 'unknown')})") - - # Dateikontexte vorbereiten - enthält nur Metadaten - file_contexts = prepare_file_contexts(files) - self._add_log(workflow, f"{len(files)} Dateien werden verarbeitet", "info") - - # Dateiinhalte lesen und zum Message-Objekt hinzufügen - # LucyDOM-Interface wird für Dateizugriffe genutzt - file_contents = await read_file_contents( - file_contexts, - self.lucydom_interface, - workflow_id, - self._add_log, - self.ai_service - ) - logger.debug(f"Dateien geladen für Workflow {workflow_id}: {file_contents.keys()}") - - for file_id, content in file_contents.items(): - file_metadata = next((f for f in files if f.get('id') == file_id), {}) - file_data = { - "id": file_id, - "name": file_metadata.get('name', next((f.get('name', 'unnamed_file') for f in file_contexts if f.get('id') == file_id), 'unnamed_file')), - "content_type": file_metadata.get('content_type', next((f.get('content_type') for f in file_contexts if f.get('id') == file_id), None)), - "type": file_metadata.get('type', next((f.get('type') for f in file_contexts if f.get('id') == file_id), "unknown")), - "content": content, - "size": file_metadata.get('size') - } - logger.info(f"Adding file {file_data['name']} (ID: {file_id}) to message {new_message['id']}") - try: - # Add file to message and check document count before and after - doc_count_before = len(new_message.get("documents", [])) - new_message = add_file_to_message(new_message, file_data) - doc_count_after = len(new_message.get("documents", [])) - - if doc_count_after > doc_count_before: - logger.info(f"File successfully added to message. Document count: {doc_count_after}") - else: - logger.warning(f"File may not have been added to message properly. Document count unchanged: {doc_count_before}") - except Exception as e: - logger.error(f"Error adding file to message: {str(e)}") - self._add_log(workflow, f"Fehler beim Hinzufügen der Datei {file_data['name']}: {str(e)}", "error") - - # Message zum Workflow hinzufügen - if "messages" not in workflow: - workflow["messages"] = [] - - # Log the message document count before adding to workflow - logger.info(f"Adding message with {len(new_message.get('documents', []))} documents to workflow {workflow_id}") - workflow["messages"].append(new_message) - - # Immediately save workflow to persist file attachments - self._save_workflow(workflow) - logger.info(f"Saved workflow state after adding message with {len(new_message.get('documents', []))} documents") - - # 4.5 Moderator-Entscheidung (mit OpenAI API) - self._add_log(workflow, "Moderator analysiert die Anfrage und wählt passende Agenten aus", "info") - - # 4.4 Agent-Initialisierung - agents = registry.initialize_agents_for_workflow() - - # Moderator-Entscheidung abfragen (nur System-Agenten) - system_agent_tasks = await self._decide_agent_tasks(workflow, new_message, agents) - - # Speichere den aktuellen Zwischenstand - self._save_workflow(workflow) - - # Nach Agenten-Entscheidung - self._add_log(workflow, f"Moderator hat die Entscheidung getroffen: {len(system_agent_tasks)} System-Agenten ausgewählt", "info") - logger.debug(f"Agent-Tasks für Workflow {workflow_id}: {[task['agent_id'] for task in system_agent_tasks]}") - - for task in system_agent_tasks: - self._add_log(workflow, f"Agent {task['agent_id']} wurde ausgewählt mit Aufgabe: {task['prompt'][:50]}...", "info") - - # 4.6 Agent-Ausführung - - # 1. System-Agenten ausführen, falls vorhanden - agent_results = [] - last_result = None - - if system_agent_tasks: - self._add_log(workflow, f"{len(system_agent_tasks)} System-Agenten werden ausgeführt", "info") - - for task in system_agent_tasks: - agent_id = task["agent_id"] - agent_prompt = task["prompt"] - expected_format = task.get("expected_format") - - if agent_id == "moderator": # moderator answered directly in variable agent_prompt - agent_result = { - "agent_id": agent_id, - "agent_name": "moderator", - "content": agent_prompt, - "agent_type": "system", - "result_format": "Text" # Moderator liefert Text - } - agent_results.append(agent_result) - else: - # Wenn ein vorheriges Ergebnis existiert, in den Prompt einbinden - if last_result: - agent_prompt = f"{agent_prompt}\n\nVorheriges Ergebnis: {last_result}" - - self._add_log(workflow, f"Agent {agent_id} wird ausgeführt", "info") - - # Agenten ausführen mit erwartetem Format - agent_result = await self._execute_agent(workflow, agent_id, agent_prompt, expected_format) - - if agent_result: - agent_results.append(agent_result) - last_result = agent_result.get("content", "") - - # Zusätzlicher Log-Eintrag für das Frontend - self._add_log(workflow, f"Agent {agent_id} hat seine Aufgabe abgeschlossen", "success") - - # 2. Immer den User-Agent aufrufen, mit einem generischen - # Prompt basierend auf den Ergebnissen der System-Agenten - - # Erstelle einen benutzerfreundlichen Prompt basierend auf den System-Agent-Ergebnissen - if agent_results: - # Wenn System-Agenten ausgeführt wurden, fasse ihre Ergebnisse zusammen - summary = await self._create_summary(agent_results) - user_prompt = f"Die Agenten haben ihre Aufgaben abgeschlossen. Hier ist eine Zusammenfassung der Ergebnisse:\n\n{summary}\n\nBenötigen Sie weitere Informationen oder haben Sie Fragen dazu?" - else: - # Wenn keine System-Agenten ausgeführt wurden - user_prompt = "Ich habe Ihre Anfrage geprüft. Wie kann ich Ihnen konkret weiterhelfen?" - - # 3. User-Agent-Nachricht erstellen und zum Workflow hinzufügen - workflow["status"] = "completed" # Workflow is complete, ready for new prompt - - user_message = { - "role": "assistant", - "content": f"[Moderator zu User Agent] {user_prompt}", - "agent_type": "moderator", - "agent_id": "moderator", - "agent_name": "Moderator", - "workflow_complete": True # Signal completion instead of waiting - } - # Nachricht zum Workflow hinzufügen - workflow["messages"].append(user_message) - - # Log-Eintrag - self._add_log(workflow, f"Workflow wartet auf Benutzereingabe: {user_prompt[:50]}...", "info") - - # Workflow speichern - self._save_workflow(workflow) - - # Fertig - Backend wartet jetzt auf nächsten API-Call vom Frontend - return { - "workflow_id": workflow_id, - "status": "completed", - "messages": workflow.get("messages", []) - } - - except Exception as e: - # Fehlerbehandlung - workflow["status"] = "failed" - self._add_log(workflow, f"Fehler bei der Workflow-Ausführung: {str(e)}", "error") - self._save_workflow(workflow) - logger.error(f"Fehler bei der Workflow-Ausführung: {str(e)}", exc_info=True) - - return { - "workflow_id": workflow_id, - "status": "failed", - "error": str(e) - } - - - def _load_active_workflows(self): - """Lädt aktive Workflows aus der Datenbank""" - try: - if not self.lucydom_interface: - return - - # Aktive Workflows für den aktuellen Benutzer abrufen - user_workflows = self.lucydom_interface.get_workflows_by_user(self.user_id) - active_workflows = [wf for wf in user_workflows if wf.get("status") in ["running", "completed"]] - - # Aktive Workflows in den Speicher laden - for workflow_base in active_workflows: - workflow_id = workflow_base.get("id") - if not workflow_id: - continue - - # Vollständigen Workflow-Zustand laden - workflow = self.lucydom_interface.load_workflow_state(workflow_id) - if workflow: - self.workflows[workflow_id] = workflow - logger.info(f"Aktiven Workflow {workflow_id} aus Datenbank geladen") - except Exception as e: - logger.error(f"Fehler beim Laden der aktiven Workflows: {str(e)}") - - def _save_workflow(self, workflow: Dict[str, Any]) -> None: - """ - Speichert den Workflow in der Datenbank und als Datei. - - Args: - workflow: Das zu speichernde Workflow-Objekt - """ - workflow_id = workflow.get("id") - - # In der Datenbank speichern, falls verfügbar - if self.lucydom_interface: - try: - success = self.lucydom_interface.save_workflow_state(workflow) - if success: - logger.debug(f"Workflow {workflow_id} in Datenbank gespeichert") - else: - logger.warning(f"Workflow {workflow_id} konnte nicht in Datenbank gespeichert werden") - except Exception as e: - logger.error(f"Fehler beim Speichern des Workflows {workflow_id} in Datenbank: {str(e)}") - - # Als Datei speichern (Backup/Fallback) - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - - try: - with open(workflow_path, 'w', encoding='utf-8') as f: - json.dump(workflow, f, indent=2, ensure_ascii=False) - logger.debug(f"Workflow {workflow_id} als Datei gespeichert: {workflow_path}") - except Exception as e: - logger.error(f"Fehler beim Speichern des Workflows {workflow_id} als Datei: {str(e)}") - - async def load_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]: - """ - Lädt einen Workflow aus der Datenbank oder Datei. - - Args: - workflow_id: ID des Workflows - - Returns: - Das geladene Workflow-Objekt oder None, wenn der Workflow nicht existiert - """ - # Prüfen, ob der Workflow bereits im Speicher ist - if workflow_id in self.workflows: - return self.workflows[workflow_id] - - # Versuche, den Workflow aus der Datenbank zu laden - if self.lucydom_interface: - try: - workflow = self.lucydom_interface.load_workflow_state(workflow_id) - if workflow: - # Workflow im Speicher cachen - self.workflows[workflow_id] = workflow - logger.info(f"Workflow {workflow_id} aus Datenbank geladen") - return workflow - except Exception as e: - logger.error(f"Fehler beim Laden des Workflows {workflow_id} aus Datenbank: {str(e)}") - - # Versuche, den Workflow aus der Datei zu laden - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - - try: - if os.path.exists(workflow_path): - with open(workflow_path, 'r', encoding='utf-8') as f: - workflow = json.load(f) - - # Workflow im Speicher cachen - self.workflows[workflow_id] = workflow - - # Optional: In Datenbank speichern, falls verfügbar - if self.lucydom_interface: - try: - self.lucydom_interface.save_workflow_state(workflow) - logger.info(f"Workflow {workflow_id} in Datenbank gespeichert nach Laden aus Datei") - except Exception as e: - logger.warning(f"Fehler beim Speichern des Workflows {workflow_id} in Datenbank nach Laden aus Datei: {str(e)}") - - logger.info(f"Workflow {workflow_id} aus Datei geladen: {workflow_path}") - return workflow - else: - logger.warning(f"Workflow {workflow_id} nicht gefunden: {workflow_path}") - raise WorkflowNotFoundError(f"Workflow {workflow_id} nicht gefunden") - except WorkflowNotFoundError: - raise - except Exception as e: - logger.error(f"Fehler beim Laden des Workflows {workflow_id} aus Datei: {str(e)}") - raise WorkflowError(f"Fehler beim Laden des Workflows: {str(e)}") - - async def list_workflows(self, mandate_id: int = None, user_id: int = None) -> List[Dict[str, Any]]: - """ - Listet alle verfügbaren Workflows auf. - - Args: - mandate_id: Optionale Mandanten-ID für die Filterung - user_id: Optionale Benutzer-ID für die Filterung - - Returns: - Liste von Workflow-Zusammenfassungen - """ - workflows = [] - - # Aus Datenbank laden, falls verfügbar - if self.lucydom_interface: - try: - # Alle Workflows des Benutzers abrufen - if user_id is not None: - user_workflows = self.lucydom_interface.get_workflows_by_user(user_id) - else: - user_workflows = self.lucydom_interface.get_all_workflows() - - # Nach Mandanten filtern, falls angegeben - if mandate_id is not None: - user_workflows = [wf for wf in user_workflows if wf.get("mandate_id") == mandate_id] - - # Workflow-Zusammenfassungen erstellen - for workflow in user_workflows: - summary = { - "id": workflow.get("id"), - "name": workflow.get("name", f"Workflow {workflow.get('id')}"), - "status": workflow.get("status"), - "started_at": workflow.get("started_at"), - "last_activity": workflow.get("last_activity"), - "completed_at": workflow.get("completed_at") - } - - # Nachrichtenanzahl hinzufügen, falls verfügbar - messages = self.lucydom_interface.get_workflow_messages(workflow.get("id")) - if messages: - summary["message_count"] = len(messages) - - workflows.append(summary) - - logger.info(f"Workflows aus Datenbank geladen: {len(workflows)}") - - # Nach letzter Aktivität sortieren (neueste zuerst) - return sorted(workflows, key=lambda w: w.get("last_activity", ""), reverse=True) - - except Exception as e: - logger.error(f"Fehler beim Abrufen der Workflows aus Datenbank: {str(e)}") - - # Aus Dateien laden, wenn keine Datenbank verfügbar oder ein Fehler aufgetreten ist - try: - for filename in os.listdir(self.results_dir): - if filename.startswith("workflow_") and filename.endswith(".json"): - workflow_path = os.path.join(self.results_dir, filename) - - try: - with open(workflow_path, 'r', encoding='utf-8') as f: - workflow = json.load(f) - - # Prüfen, ob Mandanten- und Benutzer-ID übereinstimmen - if mandate_id is not None and workflow.get("mandate_id") != mandate_id: - continue - - if user_id is not None and workflow.get("user_id") != user_id: - continue - - # Workflow-Zusammenfassung erstellen - summary = { - "id": workflow.get("id"), - "name": workflow.get("name", f"Workflow {workflow.get('id')}"), - "status": workflow.get("status"), - "started_at": workflow.get("started_at"), - "last_activity": workflow.get("last_activity"), - "message_count": len(workflow.get("messages", [])) - } - - workflows.append(summary) - except Exception as e: - logger.error(f"Fehler beim Laden der Workflow-Datei {filename}: {str(e)}") - - logger.info(f"Workflows aus Dateien geladen: {len(workflows)}") - - # Nach letzter Aktivität sortieren (neueste zuerst) - return sorted(workflows, key=lambda w: w.get("last_activity", ""), reverse=True) - - except Exception as e: - logger.error(f"Fehler beim Auflisten der Workflows: {str(e)}") - return [] - - async def delete_workflow(self, workflow_id: str) -> bool: - """ - Löscht einen Workflow. - - Args: - workflow_id: ID des Workflows - - Returns: - True bei Erfolg, False wenn der Workflow nicht existiert - """ - # Aus dem Speicher entfernen - if workflow_id in self.workflows: - del self.workflows[workflow_id] - - # Aus der Datenbank löschen - if self.lucydom_interface: - try: - db_success = self.lucydom_interface.delete_workflow(workflow_id) - logger.info(f"Workflow {workflow_id} aus Datenbank gelöscht: {db_success}") - except Exception as e: - logger.error(f"Fehler beim Löschen des Workflows {workflow_id} aus Datenbank: {str(e)}") - - # Datei löschen - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - - try: - if os.path.exists(workflow_path): - os.remove(workflow_path) - logger.info(f"Workflow {workflow_id} aus Datei gelöscht: {workflow_path}") - return True - else: - logger.warning(f"Workflow {workflow_id} nicht gefunden: {workflow_path}") - return False - except Exception as e: - logger.error(f"Fehler beim Löschen der Workflow-Datei {workflow_id}: {str(e)}") - return False - - def _initialize_workflow(self, workflow_id: str) -> Dict[str, Any]: - """ - Initialisiert einen neuen Workflow und speichert ihn in der Datenbank. - - Args: - workflow_id: ID des Workflows - - Returns: - Das initialisierte Workflow-Objekt - """ - current_time = datetime.now().isoformat() - - # Vollständiges Workflow-Objekt gemäß dem Datenmodell erstellen - workflow = { - "id": workflow_id, - "name": f"Workflow {workflow_id}", - "mandate_id": self.mandate_id, - "user_id": self.user_id, - "status": "running", - "started_at": current_time, - "last_activity": current_time, - "current_round": 1, - - # Vollständige Statistik-Struktur gemäß DataStats-Modell - "data_stats": { - "total_processing_time": 0.0, - "total_token_count": 0, - "total_bytes_sent": 0, - "total_bytes_received": 0 - }, - - # Leere Arrays für Nachrichten und Logs - "messages": [], - "logs": [] - } - - print("DEBUG Init workflow") - # Log-Eintrag für den Start des Workflows - self._add_log(workflow, "Workflow gestartet", "info") - - # Workflow in Datenbank speichern - if self.lucydom_interface: - try: - # Direktes Speichern des vollständigen Workflow-Objekts - self.lucydom_interface.save_workflow_state(workflow) - logger.info(f"Workflow {workflow_id} in Datenbank erstellt") - except Exception as e: - logger.error(f"Fehler beim Erstellen des Workflows {workflow_id} in Datenbank: {str(e)}") - - # Workflow im Speicher cachen - self.workflows[workflow_id] = workflow - - return workflow - - - async def stop_workflow(self, workflow_id: str) -> bool: - """ - Stoppt einen laufenden Workflow. - - Args: - workflow_id: ID des zu stoppenden Workflows - - Returns: - True bei Erfolg, False wenn der Workflow nicht existiert oder bereits beendet wurde - """ - try: - workflow = self.workflows.get(workflow_id) - - if not workflow: - # Versuche den Workflow zu laden - workflow = await self.load_workflow(workflow_id) - if not workflow: - return False - - # Wenn der Workflow nicht im Status 'running' oder 'completed' ist, beenden - if workflow.get("status") not in ["running", "completed"]: - return False - - # Status auf 'stopped' setzen - workflow["status"] = "stopped" - workflow["last_activity"] = datetime.now().isoformat() - - self._add_log(workflow, "Workflow wurde manuell gestoppt", "info") - - # Workflow speichern - self._save_workflow(workflow) - - return True - except Exception as e: - logger.error(f"Fehler beim Stoppen des Workflows {workflow_id}: {str(e)}") - return False - - async def _decide_agent_tasks(self, workflow: Dict[str, Any], message: Dict[str, Any], agents: Dict[str, Dict[str, Any]]) -> List[Dict[str, Any]]: - """ - Entscheidet anhand der Nachricht und Agentenprofile, welche System-Agenten für welche Aufgaben - eingesetzt werden sollen. Der User-Agent wird später immer separat aufgerufen, daher nicht hier berücksichtigt. - - Args: - message: Das zu verarbeitende Message-Objekt - agents: Verfügbare Agenten mit ihren Profilen - - Returns: - Liste mit Aufgaben für System-Agenten (agent_id, prompt) - """ - workflow_id = message.get("workflow_id", "unknown") - - try: - # Nur System-Agenten berücksichtigen, User-Agent ausfiltern - system_agents = {agent_id: agent for agent_id, agent in agents.items() - if agent.get('type') != 'user'} - - # Wenn keine System-Agenten vorhanden sind, leere Liste zurückgeben - if not system_agents: - self._add_log(workflow_id, "Keine System-Agenten verfügbar", "info") - return [] - - # Erstelle einen Prompt für den OpenAI-Call - agent_descriptions = [] - for agent_id, agent in system_agents.items(): - # Informationen zum Antwortformat hinzufügen - result_format = agent.get('result_format', 'Text') - agent_descriptions.append( - f"ID: {agent_id}, Name: {agent['name']}, Typ: {agent['type']}, " - f"Beschreibung: {agent['description']}, Fähigkeiten: {agent['capabilities']}, " - f"Antwortformat: {result_format}" - ) - - agent_description_text = "\n".join(agent_descriptions) - - # Prüfen, ob bereits ausgeführte Agenten im Kontext vorhanden sind - previous_agent_results = [] - if "messages" in workflow: # Verwende workflow statt context - for prev_message in workflow.get("messages", []): - if prev_message.get("agent_type") and prev_message.get("agent_type") != "user": - previous_agent_results.append({ - "agent_id": prev_message.get("agent_id", "unknown"), - "agent_type": prev_message.get("agent_type", "unknown"), - "result_format": prev_message.get("result_format", "Text"), - "sequence_no": prev_message.get("sequence_no", 0) - }) - - previous_results_text = "" - if previous_agent_results: - previous_results_text = "VORHERIGE AGENTEN-ERGEBNISSE:\n" - for result in previous_agent_results: - previous_results_text += ( - f"Agent: {result['agent_id']}, Typ: {result['agent_type']}, " - f"Antwortformat: {result['result_format']}, Sequenz: {result['sequence_no']}\n" - ) - - # Nachrichteninhalt extrahieren - content = message.get("content", "") - - # Dateien aus der Nachricht extrahieren - files = extract_files_from_message(message) - file_descriptions = [] - - for file in files: - file_desc = f"Name: {file.get('name', '')}, Typ: {file.get('content_type', '')}" - # Check if content exists and is not too large to include - if 'content' in file and isinstance(file.get('content'), str) and len(file.get('content', '')) <= 5000: - file_desc += f", Inhalt: {file.get('content', '')[:200]}..." - file_descriptions.append(file_desc) - - file_description_text = "\n".join(file_descriptions) if file_descriptions else "Keine Dateien" - - # Add log for the agent selection process - self._add_log(workflow_id, "Moderator analysiert die Anfrage und entscheidet über System-Agenten...", "info") - - # Prompt für den OpenAI-Call erstellen - decision_prompt = f""" - Du bist der Workflow-Manager, der entscheidet, welche System-Agenten für eine Anfrage eingesetzt werden sollen. - - VERFÜGBARE SYSTEM-AGENTEN: - {agent_description_text} - - {previous_results_text} - - BENUTZERANFRAGE: - {content} - - DATEIEN: - {file_description_text} - - ANWEISUNGEN: - 1. Analysiere die Benutzeranfrage und die Dateien - 2. Entscheide, welche System-Agenten benötigt werden - 3. Berücksichtige die Antwortformate der Agenten bei der Auswahl - 4. Wenn Du in der Lage bist, die BENUTZERANFRAGE direkt selbst zu beantworten, so kannst Du direkt die Antwort senden, wähle dazu FORMAT1. Andernfalls wähle die passenden System-Agenten (mindestens einen) aus und definiere für jeden seine spezifische Aufgabe, die Angaben gemäss FORMAT2. - 5. Gib das Ergebnis als JSON-Array von Objekten zurück - 6. Berücksichtige bei deiner Entscheidung den Kontext und Verlauf der Konversation - 7. Wenn möglich, wähle Agenten so, dass das Ausgabeformat eines Agenten zum erwarteten Eingabeformat des nächsten Agenten passt - - Antwortformat FORMAT1: - [ - {{"agent_id": "moderator", "prompt": "Die direkte Beantwortung der BENUTZERANFRAGE"}}, - ] - - Antwortformat FORMAT2: - [ - {{"agent_id": "agent_id_1", "prompt": "Aufgabenbeschreibung für Agent 1", "expected_format": "Name des erwarteten Ausgabeformats"}}, - {{"agent_id": "agent_id_2", "prompt": "Aufgabenbeschreibung für Agent 2", "expected_format": "Name des erwarteten Ausgabeformats"}} - ] - - WICHTIG: - - Füge keine weiteren Erklärungen hinzu, antworte nur mit dem JSON-Array - """ - - # OpenAI-Call durchführen - content = await self.ai_service.call_api([{"role": "user", "content": decision_prompt}]) - - # Versuche, JSON zu parsen - import json - import re - - # Suche nach JSON-Objekten in der Antwort - json_match = re.search(r'\[\s*{.*}\s*\]', content, re.DOTALL) - - # Auch leere Arrays erkennen - if not json_match and "[]" in content: - # Leeres Array erkannt - keine System-Agenten ausgewählt - self._add_log(workflow_id, "Moderator hat entschieden, keine System-Agenten zu verwenden", "info") - return [] - - if json_match: - json_str = json_match.group(0) - try: - agent_tasks = json.loads(json_str) - - # Validiere die Struktur und filtere User-Agent heraus (falls irrtümlich enthalten) - valid_tasks = [] - for task in agent_tasks: - if "agent_id" not in task or "prompt" not in task: - self._add_log(workflow_id, f"Ungültiges Task-Format ignoriert: {task}", "warning") - continue - - # Prüfe, ob der Agent existiert und kein User-Agent ist - if task["agent_id"] not in system_agents: - self._add_log(workflow_id, f"Agent '{task['agent_id']}' liefert eine direkte Antwort", "info") - - # Füge expected_format hinzu, falls vorhanden - if "expected_format" in task: - # Logge das erwartete Format - self._add_log(workflow_id, f"Agent '{task['agent_id']}' erwartet Format: {task['expected_format']}", "info") - else: - # Default expected_format basierend auf dem Agent-Typ setzen - agent_info = system_agents.get(task["agent_id"], {}) - task["expected_format"] = agent_info.get("result_format", "Text") - - valid_tasks.append(task) - - # Logge die Anzahl der ausgewählten Agenten - if valid_tasks: - self._add_log(workflow_id, f"Moderator hat {len(valid_tasks)} System-Agenten ausgewählt", "info") - else: - self._add_log(workflow_id, "Moderator hat keine passenden System-Agenten gefunden", "info") - - logger.debug(f"Ausgewählte System-Agenten-Tasks: {valid_tasks}") - return valid_tasks - - except json.JSONDecodeError as json_error: - self._add_log(workflow_id, f"Fehler beim Parsen des JSON: {str(json_error)}", "error") - logger.error(f"JSON Parse-Fehler: {str(json_error)}") - logger.error(f"Problematischer JSON-String: {json_str}") - - # Keine Agenten zurückgeben bei Parsing-Fehler - return [] - else: - # Kein JSON gefunden - self._add_log(workflow_id, "Moderator konnte keine Agenten-Auswahl treffen", "warning") - logger.warning("Kein gültiges JSON in der Moderator-Antwort gefunden") - return [] - - except Exception as e: - # Bei Fehlern keine Agenten zurückgeben, mit Logging - self._add_log(workflow_id, f"Fehler bei der Agent-Auswahl: {str(e)}", "error") - logger.error(f"Fehler bei der Agent-Auswahl: {str(e)}", exc_info=True) - return [] - - - async def _execute_agent(self, workflow: Dict[str, Any], agent_id: str, prompt: str, expected_format: str = None) -> Optional[Dict[str, Any]]: - """ - Führt einen Agenten mit einem spezifischen Prompt aus. - - Args: - workflow: Das Workflow-Objekt - agent_id: ID des auszuführenden Agenten - prompt: Prompt für den Agenten - expected_format: Erwartetes Format der Antwort (optional) - - Returns: - Das Ergebnis des Agenten oder None bei Fehlern - """ - try: - # Agenten-Instanz holen - registry = AgentRegistry.get_instance() - agent = registry.get_agent(agent_id) - - if not agent: - self._add_log(workflow, f"Agent '{agent_id}' nicht gefunden", "error") - return None - - # Message-Objekt für den Agenten erstellen - agent_message = { - "role": "user", - "content": prompt, - "workflow_id": workflow["id"] - } - - # Kontext mit erwartetem Format erstellen - context = {"expected_format": expected_format} if expected_format else {} - - # Agenten ausführen - self._add_log(workflow, f"Agent '{agent_id}' wird ausgeführt", "info") - result = await agent.process_message(agent_message, context) - - # Prüfen, ob das Ergebnis das erwartete Format hat - result_format = result.get("result_format") - if expected_format and result_format and expected_format != result_format: - self._add_log( - workflow, - f"Warnung: Agent '{agent_id}' hat Format '{result_format}' geliefert, aber '{expected_format}' wurde erwartet", - "warning" - ) - - # Agenten-Antwort als neue Nachricht zum Workflow hinzufügen - agent_response_message = self._create_message(workflow["id"], "assistant") - agent_response_message["content"] = result.get("content", "") - agent_response_message["agent_type"] = agent.type - agent_response_message["agent_id"] = agent_id - agent_response_message["agent_name"] = agent.name - agent_response_message["result_format"] = result.get("result_format", agent.result_format) - - # Nachricht zum Workflow hinzufügen - workflow["messages"].append(agent_response_message) - - - # Nachricht abschließen und in der Datenbank speichern - self._finalize_last_message(workflow) - - # Workflow-Zustand speichern - self._save_workflow(workflow) - - # Ergebnis formatieren und zurückgeben - agent_result = { - "agent_id": agent_id, - "agent_name": agent.name, - "content": result.get("content", ""), - "agent_type": agent.type - } - - self._add_log(workflow, f"Agent '{agent_id}' hat geantwortet", "info") - - return agent_result - - except Exception as e: - self._add_log(workflow, f"Fehler bei der Ausführung von Agent '{agent_id}': {str(e)}", "error") - return None - - - async def _create_summary(self, agent_results: List[Dict[str, Any]]) -> str: - """ - Erstellt eine Zusammenfassung der Agentenergebnisse. - - Args: - agent_results: Liste der Agentenergebnisse - - Returns: - Zusammenfassung als Text - """ - if not agent_results: - return "Keine Agentenergebnisse verfügbar." - - # Kombiniere die Ergebnisse in einen Kontext - context = "" - for result in agent_results: - agent_name = result.get("agent_name", "Unbekannter Agent") - content = result.get("content", "") - - context += f"--- {agent_name} ---\n{content}\n\n" - - # Prompt für die Zusammenfassung - summary_prompt = f""" - Erstelle eine aussagekräftige Zusammenfassung der folgenden Agentenergebnisse. - Organisiere die Informationen strukturiert und vermeide Redundanzen. - Behalte alle wichtigen Erkenntnisse und Empfehlungen bei. - - {context} - """ - - # OpenAI-Call für die Zusammenfassung - try: - summary = await self.ai_service.call_api([{"role": "user", "content": summary_prompt}]) - return summary - except Exception as e: - logger.error(f"Fehler bei der Erstellung der Zusammenfassung: {str(e)}") - return "Fehler bei der Erstellung der Zusammenfassung. Bitte die individuellen Agentenergebnisse beachten." - - def _add_log(self, workflow: Dict[str, Any], message: str, log_type: str, agent_id: Optional[str] = None, agent_name: Optional[str] = None) -> None: - """ - Fügt einen Log-Eintrag zum Workflow hinzu und speichert ihn in der Datenbank. - """ - # First, check if workflow is a string (ID) instead of dictionary - if isinstance(workflow, str): - # Try to load the workflow by ID - workflow_id = workflow - workflow = self.workflows.get(workflow_id) - if not workflow: - # Just log to the logger and return - logger.info(f"Log (couldn't add to workflow {workflow_id}): {log_type} - {message}") - return - # Check if workflow is a dictionary - if not isinstance(workflow, dict): - logger.error(f"Invalid workflow type: {type(workflow)}. Expected dictionary.") - # Just log to the logger and return - logger.info(f"Log (couldn't add to workflow): {log_type} - {message}") - return - - # Continue with the rest of the function if workflow is a dictionary - - log_entry = { - "id": f"log_{uuid.uuid4()}", - "message": message, - "type": log_type, - "timestamp": datetime.now().isoformat(), - "agent_id": agent_id, - "agent_name": agent_name - } - - # Log-Eintrag zum Workflow hinzufügen - if "logs" not in workflow: - workflow["logs"] = [] - - workflow["logs"].append(log_entry) - - # Letzte Aktivität aktualisieren - workflow["last_activity"] = log_entry["timestamp"] - # Log-Eintrag in Datenbank speichern, falls verfügbar - if self.lucydom_interface: - try: - # Workflow-ID zum Log-Eintrag hinzufügen - log_data = log_entry.copy() - log_data["workflow_id"] = workflow["id"] - - self.lucydom_interface.create_workflow_log(log_data) - logger.debug(f"Log-Eintrag für Workflow {workflow['id']} in Datenbank gespeichert") - except Exception as e: - logger.error(f"Fehler beim Speichern des Log-Eintrags für Workflow {workflow['id']} in Datenbank: {str(e)}") - logger.info(f"Workflow {workflow['id']}: {message}") - - - def _create_message(self, workflow_id: str, role: str = "system", parent_message_id: str = None) -> Dict[str, Any]: - """ - Erstellt ein neues Message-Objekt und speichert es in der Datenbank. - - Args: - workflow_id: ID des Workflows - role: Rolle der Nachricht ('system', 'user', 'assistant') - parent_message_id: ID der Elternnachricht (optional) - - Returns: - Das erstellte Message-Objekt - """ - workflow = self.workflows.get(workflow_id) - - # Sequence-Nummer bestimmen - sequence_no = 1 - if workflow and workflow.get("messages"): - sequence_no = len(workflow["messages"]) + 1 - - # Aktuelle Zeit - current_time = datetime.now().isoformat() - - # Ensure a unique ID for the message - message_id = f"msg_{uuid.uuid4()}" - - # Message-Objekt erstellen - message = { - "id": message_id, - "workflow_id": workflow_id, - "parent_message_id": parent_message_id, - "started_at": current_time, - "finished_at": None, - "sequence_no": sequence_no, - - "status": "pending", - "role": role, - - "data_stats": { - "processing_time": 0.0, - "token_count": 0, - "bytes_sent": 0, - "bytes_received": 0 - }, - - "documents": [], # Initialize empty documents array - "content": None, - "agent_type": None - } - - # In Datenbank speichern, falls verfügbar - if self.lucydom_interface: - try: - # Include all fields in the database version - message_data = { - "id": message_id, - "workflow_id": workflow_id, - "sequence_no": sequence_no, - "role": role, - "content": None, - "agent_type": None, - "created_at": current_time, - # IMPORTANT: Include documents field - "documents": [] - } - - # Log the message creation - logger.debug(f"Creating new message in database: {message_data}") - - result = self.lucydom_interface.create_workflow_message(message_data) - if result: - logger.debug(f"Nachricht für Workflow {workflow_id} in Datenbank erstellt mit ID: {message_id}") - else: - logger.warning(f"Fehler beim Erstellen der Nachricht für Workflow {workflow_id} in Datenbank") - except Exception as e: - logger.error(f"Fehler beim Erstellen der Nachricht für Workflow {workflow_id} in Datenbank: {str(e)}") - - return message - - - def _finalize_last_message(self, workflow: Dict[str, Any]) -> None: - """ - Schließt die letzte Nachricht im Workflow ab und aktualisiert sie in der Datenbank. - - Args: - workflow: Das Workflow-Objekt - """ - if not workflow.get("messages"): - return - - last_message = workflow["messages"][-1] - if last_message.get("finished_at") is None: - last_message["finished_at"] = datetime.now().isoformat() - last_message["status"] = "completed" - - # In Datenbank aktualisieren, falls verfügbar - if self.lucydom_interface: - try: - message_id = last_message.get("id") - if not message_id: - logger.warning(f"Keine ID für letzte Nachricht in Workflow {workflow['id']} gefunden") - return - - # Only extract fields that are expected in the database model - # Make sure all required fields have values with proper defaults - message_data = { - "id": message_id, - "workflow_id": workflow.get("id", ""), - "sequence_no": last_message.get("sequence_no", 0), - "role": last_message.get("role", "unknown"), - "content": last_message.get("content", ""), - "agent_type": last_message.get("agent_type", ""), - "created_at": last_message.get("started_at", datetime.now().isoformat()), - # IMPORTANT: Include the documents array - "documents": last_message.get("documents", []) - } - - # Log the message data for debugging - logger.debug(f"Updating message in database with data: {message_data}") - - # Nachricht in Datenbank aktualisieren - self.lucydom_interface.update_workflow_message(message_id, message_data) - logger.debug(f"Nachricht {message_id} für Workflow {workflow['id']} in Datenbank aktualisiert (mit Dokumenten)") - except Exception as e: - logger.error(f"Fehler beim Aktualisieren der Nachricht für Workflow {workflow['id']} in Datenbank: {str(e)}") - - - def get_workflow_status(self, workflow_id: str) -> Optional[Dict[str, Any]]: - """ - Gibt den Status eines Workflows zurück. - - Args: - workflow_id: ID des Workflows - - Returns: - Dictionary mit Status-Informationen oder None, wenn der Workflow nicht existiert - """ - # Aus dem Speicher abrufen - workflow = self.workflows.get(workflow_id) - - # Falls nicht im Speicher, aus der Datenbank oder Datei laden - if not workflow: - # Aus Datenbank laden, falls verfügbar - if self.lucydom_interface: - try: - workflow_data = self.lucydom_interface.get_workflow(workflow_id) - if workflow_data: - workflow = workflow_data - except Exception as e: - logger.error(f"Fehler beim Laden des Workflow-Status aus Datenbank: {str(e)}") - - # Falls nicht in der Datenbank, aus Datei laden - if not workflow: - try: - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - if os.path.exists(workflow_path): - with open(workflow_path, 'r', encoding='utf-8') as f: - workflow = json.load(f) - except Exception as e: - logger.error(f"Fehler beim Laden des Workflow-Status aus Datei: {str(e)}") - return None - - if not workflow: - return None - - # Status-Informationen extrahieren - status_info = { - "id": workflow.get("id"), - "name": workflow.get("name", f"Workflow {workflow_id}"), - "status": workflow.get("status"), - "progress": 1.0 if workflow.get("status") in ["completed", "failed", "stopped"] else 0.5, - "started_at": workflow.get("started_at"), - "last_activity": workflow.get("last_activity"), - "workflow_complete": workflow.get("status") == "completed", # Add this instead - "current_round": workflow.get("current_round", 1), - "data_stats": workflow.get("data_stats", { - "total_processing_time": 0.0, - "total_token_count": 0, - "total_bytes_sent": 0, - "total_bytes_received": 0 - }) - } - - return status_info - - def get_workflow_logs(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]: - """ - Gibt die Logs eines Workflows zurück. - - Args: - workflow_id: ID des Workflows - - Returns: - Liste der Logs oder None, wenn der Workflow nicht existiert - """ - # Aus dem Speicher abrufen - workflow = self.workflows.get(workflow_id) - - # Falls nicht im Speicher, aus der Datenbank laden - if not workflow and self.lucydom_interface: - try: - logs = self.lucydom_interface.get_workflow_logs(workflow_id) - return logs - except Exception as e: - logger.error(f"Fehler beim Laden der Workflow-Logs aus Datenbank: {str(e)}") - - # Falls nicht in der Datenbank oder kein Interface verfügbar, aus Datei laden - if not workflow: - try: - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - if os.path.exists(workflow_path): - with open(workflow_path, 'r', encoding='utf-8') as f: - workflow = json.load(f) - except Exception as e: - logger.error(f"Fehler beim Laden der Workflow-Logs aus Datei: {str(e)}") - return None - - return workflow.get("logs", []) if workflow else None - - def get_workflow_messages(self, workflow_id: str) -> Optional[List[Dict[str, Any]]]: - """ - Gibt die Nachrichten eines Workflows zurück. - - Args: - workflow_id: ID des Workflows - - Returns: - Liste der Nachrichten oder None, wenn der Workflow nicht existiert - """ - # Aus dem Speicher abrufen - workflow = self.workflows.get(workflow_id) - - # Falls nicht im Speicher, aus der Datenbank laden - if not workflow and self.lucydom_interface: - try: - messages = self.lucydom_interface.get_workflow_messages(workflow_id) - return messages - except Exception as e: - logger.error(f"Fehler beim Laden der Workflow-Nachrichten aus Datenbank: {str(e)}") - - # Falls nicht in der Datenbank oder kein Interface verfügbar, aus Datei laden - if not workflow: - try: - workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") - if os.path.exists(workflow_path): - with open(workflow_path, 'r', encoding='utf-8') as f: - workflow = json.load(f) - except Exception as e: - logger.error(f"Fehler beim Laden der Workflow-Nachrichten aus Datei: {str(e)}") - return None - - return workflow.get("messages", []) if workflow else None - -# Anpassen der Factory-Funktion für den WorkflowManager -def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None): - """ - Gibt eine WorkflowManager-Instanz für den angegebenen Kontext zurück. - Wiederverwendet bestehende Instanzen. - - Args: - mandate_id: ID des Mandanten - user_id: ID des Benutzers - ai_service: Service für KI-Anfragen - - Returns: - Eine WorkflowManager-Instanz - """ - from modules.lucydom_interface import get_lucydom_interface - - context_key = f"{mandate_id}_{user_id}" - - # LucyDOM-Interface für Datenbankzugriffe - lucydom_interface = get_lucydom_interface(mandate_id, user_id) - - if context_key not in _workflow_managers: - _workflow_managers[context_key] = WorkflowManager( - mandate_id, - user_id, - ai_service, - lucydom_interface - ) - - # Aktualisiere die Services, falls sie geändert wurden - if ai_service is not None: - _workflow_managers[context_key].ai_service = ai_service - - return _workflow_managers[context_key] - - -# Singleton-Factory für WorkflowManager-Instanzen pro Kontext -_workflow_managers = {} \ No newline at end of file diff --git a/gwserver/_old_bk_modules/gateway_interface.py b/gwserver/_old_bk_modules/gateway_interface.py deleted file mode 100644 index 7caf809f..00000000 --- a/gwserver/_old_bk_modules/gateway_interface.py +++ /dev/null @@ -1,469 +0,0 @@ -import os -import logging -from typing import Dict, Any, List, Optional, Union -import importlib -from passlib.context import CryptContext - -from connectors.connector_db_json import DatabaseConnector - -logger = logging.getLogger(__name__) - - -# Password-Hashing -pwd_context = CryptContext(schemes=["argon2"], deprecated="auto") - - -class GatewayInterface: - """ - Interface zum Gateway-System. - Verwaltet Benutzer und Mandanten. - """ - - def __init__(self, mandate_id: int = None, user_id: int = None): - """ - Initialisiert das Gateway-Interface mit optionalem Mandanten- und Benutzerkontext. - - Args: - mandate_id: ID des aktuellen Mandanten (optional) - user_id: ID des aktuellen Benutzers (optional) - """ - # Bei der Initialisierung kann der Kontext leer sein - self.mandate_id = mandate_id - self.user_id = user_id - - # Datenverzeichnis - self.data_folder = "_database_gateway" - os.makedirs(self.data_folder, exist_ok=True) - logger.info("db for data_gateway attached") - - # Datenmodell-Modul importieren - try: - self.model_module = importlib.import_module("modules.gateway_model") - logger.info("gateway_model erfolgreich importiert") - except ImportError as e: - logger.error(f"Fehler beim Importieren von gateway_model: {e}") - raise - - # Konnektor erstellen - logger.info(f"API getting connector {mandate_id} {user_id}") - self.db = DatabaseConnector( - db_folder=self.data_folder, - mandate_id=self.mandate_id if self.mandate_id is not None else 0, - user_id=self.user_id if self.user_id is not None else 0 - ) - - # Datenbank initialisieren, falls nötig - self._initialize_database() - - def _initialize_database(self): - """ - Initialisiert die Datenbank mit minimalen Objekten, - falls sie noch nicht existiert. - """ - - # Prüfe, ob Mandanten existieren - # Erstelle den Root-Mandanten, falls nötig - existing_mandate_id = self.get_initial_id("mandates") - mandates = self.db.get_recordset("mandates") - if existing_mandate_id is None or not mandates: - logger.info("Erstelle Root-Mandant") - root_mandate = { - "name": "Root", - "language": "de" - } - created_mandate = self.db.record_create("mandates", root_mandate) - logger.info(f"Root-Mandant wurde erstellt mit ID {created_mandate['id']}") - - # Aktualisiere den Mandanten-Kontext - self.mandate_id = created_mandate['id'] - self.user_id = created_mandate['user_id'] - - # Konnektor mit korrektem Kontext neu erstellen - self.db = DatabaseConnector( - db_folder=self.data_folder, - mandate_id=self.mandate_id, - user_id=self.user_id - ) - - # Prüfe, ob Benutzer existieren - # Erstelle den Admin-Benutzer, falls nötig - existing_user_id = self.get_initial_id("users") - users = self.db.get_recordset("users") - if existing_user_id is None or not users: - logger.info("Erstelle Admin-Benutzer") - admin_user = { - "mandate_id": self.mandate_id, - "username": "admin", - "email": "admin@example.com", - "full_name": "Administrator", - "disabled": False, - "language": "de", - "privilege": "sysadmin", # SysAdmin-Berechtigung - "hashed_password": self._get_password_hash("admin") # In der Produktion ein sicheres Passwort verwenden! - } - created_user = self.db.record_create("users", admin_user) - logger.info(f"Admin-Benutzer wurde erstellt mit ID {created_user['id']}") - - # Aktualisiere den Benutzer-Kontext - self.user_id = created_user['id'] - - # Konnektor mit korrektem Kontext neu erstellen - self.db = DatabaseConnector( - db_folder=self.data_folder, - mandate_id=self.mandate_id, - user_id=self.user_id - ) - - def get_initial_id(self, table: str) -> Optional[int]: - """ - Gibt die initiale ID für eine Tabelle zurück. - - Args: - table: Name der Tabelle - - Returns: - Die initiale ID oder None, wenn nicht vorhanden - """ - return self.db.get_initial_id(table) - - def _get_password_hash(self, password: str) -> str: - """Erstellt einen Hash für ein Passwort""" - return pwd_context.hash(password) - - def _verify_password(self, plain_password: str, hashed_password: str) -> bool: - """Überprüft, ob das Passwort zum Hash passt""" - return pwd_context.verify(plain_password, hashed_password) - - def _get_current_timestamp(self) -> str: - """Gibt den aktuellen Zeitstempel im ISO-Format zurück""" - from datetime import datetime - return datetime.now().isoformat() - - # Mandanten-Methoden - - def get_all_mandates(self) -> List[Dict[str, Any]]: - """Gibt alle Mandanten zurück""" - return self.db.get_recordset("mandates") - - def get_mandate(self, mandate_id: int) -> Optional[Dict[str, Any]]: - """Gibt einen Mandanten anhand seiner ID zurück""" - mandates = self.db.get_recordset("mandates", record_filter={"id": mandate_id}) - if mandates: - return mandates[0] - return None - - def create_mandate(self, name: str, language: str = "de") -> Dict[str, Any]: - """Erstellt einen neuen Mandanten""" - mandate_data = { - "name": name, - "language": language - } - - return self.db.record_create("mandates", mandate_data) - - def update_mandate(self, mandate_id: int, mandate_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Aktualisiert einen bestehenden Mandanten - - Args: - mandate_id: Die ID des zu aktualisierenden Mandanten - mandate_data: Die zu aktualisierenden Mandantendaten - - Returns: - Dict[str, Any]: Die aktualisierten Mandantendaten - - Raises: - ValueError: Wenn der Mandant nicht gefunden wurde - """ - # Prüfe, ob der Mandant existiert - mandate = self.get_mandate(mandate_id) - if not mandate: - raise ValueError(f"Mandant mit ID {mandate_id} nicht gefunden") - - # Aktualisiere den Mandanten - updated_mandate = self.db.record_modify("mandates", mandate_id, mandate_data) - - return updated_mandate - - def delete_mandate(self, mandate_id: int) -> bool: - """ - Löscht einen Mandanten und alle damit verbundenen Benutzer und Daten - - Args: - mandate_id: Die ID des zu löschenden Mandanten - - Returns: - bool: True, wenn der Mandant erfolgreich gelöscht wurde, sonst False - """ - # Prüfe, ob der Mandant existiert - mandate = self.get_mandate(mandate_id) - if not mandate: - return False - - # Prüfe, ob es der initiale Mandant ist - initial_mandate_id = self.get_initial_id("mandates") - if initial_mandate_id is not None and mandate_id == initial_mandate_id: - logger.warning(f"Versuch, den Root-Mandanten zu löschen, wurde verhindert") - return False - - # Finde alle Benutzer des Mandanten - users = self.get_users_by_mandate(mandate_id) - - # Lösche alle Benutzer des Mandanten und ihre zugehörigen Daten - for user in users: - self.delete_user(user["id"]) - - # Lösche den Mandanten - success = self.db.record_delete("mandates", mandate_id) - - if success: - logger.info(f"Mandant mit ID {mandate_id} wurde erfolgreich gelöscht") - else: - logger.error(f"Fehler beim Löschen des Mandanten mit ID {mandate_id}") - - return success - - # Benutzer-Methoden - - def get_all_users(self) -> List[Dict[str, Any]]: - """Gibt alle Benutzer zurück""" - users = self.db.get_recordset("users") - # Entferne die Passwort-Hashes aus der Rückgabe - for user in users: - if "hashed_password" in user: - del user["hashed_password"] - return users - - def get_users_by_mandate(self, mandate_id: int) -> List[Dict[str, Any]]: - """ - Gibt alle Benutzer eines bestimmten Mandanten zurück - - Args: - mandate_id: Die ID des Mandanten - - Returns: - List[Dict[str, Any]]: Liste der Benutzer des Mandanten - """ - users = self.db.get_recordset("users", record_filter={"mandate_id": mandate_id}) - # Entferne die Passwort-Hashes aus der Rückgabe - for user in users: - if "hashed_password" in user: - del user["hashed_password"] - return users - - def get_user_by_username(self, username: str) -> Optional[Dict[str, Any]]: - """Gibt einen Benutzer anhand seines Benutzernamens zurück""" - users = self.db.get_recordset("users") - for user in users: - if user.get("username") == username: - return user - return None - - def get_user(self, user_id: int) -> Optional[Dict[str, Any]]: - """Gibt einen Benutzer anhand seiner ID zurück""" - users = self.db.get_recordset("users", record_filter={"id": user_id}) - if users: - user = users[0] - # Entferne das Passwort-Hash aus der Rückgabe für die API - if "hashed_password" in user: - user_copy = user.copy() - del user_copy["hashed_password"] - return user_copy - return user - return None - - def create_user(self, username: str, password: str, email: str = None, - full_name: str = None, language: str = "de", mandate_id: int = None, - disabled: bool = False, privilege: str = "user") -> Dict[str, Any]: - """ - Erstellt einen neuen Benutzer - - Args: - username: Der Benutzername - password: Das Passwort - email: Die E-Mail-Adresse (optional) - full_name: Der vollständige Name (optional) - language: Die bevorzugte Sprache (Standard: "de") - mandate_id: Die ID des Mandanten (optional) - disabled: Ob der Benutzer deaktiviert ist (Standard: False) - privilege: Die Berechtigungsstufe (Standard: "user") - - Returns: - Dict[str, Any]: Die erstellten Benutzerdaten - - Raises: - ValueError: Wenn der Benutzername bereits existiert - """ - # Prüfe, ob der Benutzername bereits existiert - existing_user = self.get_user_by_username(username) - if existing_user: - raise ValueError(f"Benutzer '{username}' existiert bereits") - - # Verwende den übergebenen mandate_id oder den aktuellen Kontext - user_mandate_id = mandate_id if mandate_id is not None else self.mandate_id - - user_data = { - "mandate_id": user_mandate_id, - "username": username, - "email": email, - "full_name": full_name, - "disabled": disabled, - "language": language, - "privilege": privilege, - "hashed_password": self._get_password_hash(password) - } - - created_user = self.db.record_create("users", user_data) - - # Entferne das Passwort-Hash aus der Rückgabe - if "hashed_password" in created_user: - del created_user["hashed_password"] - - return created_user - - def authenticate_user(self, username: str, password: str) -> Optional[Dict[str, Any]]: - """ - Authentifiziert einen Benutzer anhand von Benutzername und Passwort - - Args: - username: Der Benutzername - password: Das Passwort - - Returns: - Optional[Dict[str, Any]]: Die Benutzerdaten oder None, wenn die Authentifizierung fehlschlägt - """ - user = self.get_user_by_username(username) - - if not user: - return None - - if not self._verify_password(password, user.get("hashed_password", "")): - return None - - # Prüfe, ob der Benutzer deaktiviert ist - if user.get("disabled", False): - return None - - # Erstelle eine Kopie ohne Passwort-Hash - authenticated_user = {**user} - if "hashed_password" in authenticated_user: - del authenticated_user["hashed_password"] - - return authenticated_user - - def update_user(self, user_id: int, user_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Aktualisiert einen Benutzer - - Args: - user_id: Die ID des zu aktualisierenden Benutzers - user_data: Die zu aktualisierenden Benutzerdaten - - Returns: - Dict[str, Any]: Die aktualisierten Benutzerdaten - - Raises: - ValueError: Wenn der Benutzer nicht gefunden wurde - """ - # Hole den aktuellen Benutzer mit Hash-Passwort (direkt aus der DB) - users = self.db.get_recordset("users", record_filter={"id": user_id}) - if not users: - raise ValueError(f"Benutzer mit ID {user_id} nicht gefunden") - - user = users[0] - - # Wenn das Passwort geändert werden soll, hashe es - if "password" in user_data: - user_data["hashed_password"] = self._get_password_hash(user_data["password"]) - del user_data["password"] - - # Aktualisiere den Benutzer - updated_user = self.db.record_modify("users", user_id, user_data) - - # Entferne das Passwort-Hash aus der Rückgabe - if "hashed_password" in updated_user: - del updated_user["hashed_password"] - - return updated_user - - def disable_user(self, user_id: int) -> Dict[str, Any]: - """Deaktiviert einen Benutzer""" - return self.update_user(user_id, {"disabled": True}) - - def enable_user(self, user_id: int) -> Dict[str, Any]: - """Aktiviert einen Benutzer""" - return self.update_user(user_id, {"disabled": False}) - - def _delete_user_referenced_data(self, user_id: int) -> None: - """ - Löscht alle Daten, die mit einem Benutzer verbunden sind - - Args: - user_id: Die ID des Benutzers - """ - # Hier werden alle Tabellen durchsucht und alle Einträge gelöscht, - # die auf diesen Benutzer verweisen - - # Attribute des Benutzers löschen - try: - attributes = self.db.get_recordset("attributes", record_filter={"user_id": user_id}) - for attribute in attributes: - self.db.record_delete("attributes", attribute["id"]) - except Exception as e: - logger.error(f"Fehler beim Löschen der Attribute für Benutzer {user_id}: {e}") - - # Weitere Tabellen, die auf den Benutzer verweisen könnten - # (Je nach Datenbankstruktur der Anwendung) - - logger.info(f"Alle referenzierten Daten für Benutzer {user_id} wurden gelöscht") - - def delete_user(self, user_id: int) -> bool: - """ - Löscht einen Benutzer und alle damit verbundenen Daten - - Args: - user_id: Die ID des zu löschenden Benutzers - - Returns: - bool: True, wenn der Benutzer erfolgreich gelöscht wurde, sonst False - """ - # Prüfe, ob der Benutzer existiert - users = self.db.get_recordset("users", record_filter={"id": user_id}) - if not users: - return False - - # Prüfe, ob es der initiale Benutzer ist - initial_user_id = self.get_initial_id("users") - if initial_user_id is not None and user_id == initial_user_id: - logger.warning("Versuch, den Root-Admin zu löschen, wurde verhindert") - return False - - # Lösche alle mit dem Benutzer verbundenen Daten - self._delete_user_referenced_data(user_id) - - # Lösche den Benutzer - success = self.db.record_delete("users", user_id) - - if success: - logger.info(f"Benutzer mit ID {user_id} wurde erfolgreich gelöscht") - else: - logger.error(f"Fehler beim Löschen des Benutzers mit ID {user_id}") - - return success - - -# Singleton-Factory für GatewayInterface-Instanzen pro Kontext -_gateway_interfaces = {} - -def get_gateway_interface(mandate_id: int = None, user_id: int = None) -> GatewayInterface: - """ - Gibt eine GatewayInterface-Instanz für den angegebenen Kontext zurück. - Wiederverwendet bestehende Instanzen. - """ - context_key = f"{mandate_id}_{user_id}" - if context_key not in _gateway_interfaces: - _gateway_interfaces[context_key] = GatewayInterface(mandate_id, user_id) - return _gateway_interfaces[context_key] - -# Init -get_gateway_interface() \ No newline at end of file diff --git a/gwserver/_old_bk_modules/gateway_model.py b/gwserver/_old_bk_modules/gateway_model.py deleted file mode 100644 index 5a476fcb..00000000 --- a/gwserver/_old_bk_modules/gateway_model.py +++ /dev/null @@ -1,94 +0,0 @@ -from pydantic import BaseModel, Field -from typing import List, Dict, Any, Optional -from datetime import datetime - - -class Label(BaseModel): - """Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen""" - default: str - translations: Dict[str, str] = {} - - def get_label(self, language: str = None): - """Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar""" - if language and language in self.translations: - return self.translations[language] - return self.default - - -class Mandate(BaseModel): - """Datenmodell für einen Mandanten""" - id: int = Field(description="Eindeutige ID des Mandanten") - name: str = Field(description="Name des Mandanten") - language: str = Field(description="Standardsprache des Mandanten") - - label: Label = Field( - default=Label(default="Mandant", translations={"en": "Mandate", "fr": "Mandat"}), - description="Label für die Klasse" - ) - - # Labels für Attribute - field_labels: Dict[str, Label] = { - "id": Label(default="ID", translations={}), - "name": Label(default="Name des Mandanten", translations={"en": "Mandate name", "fr": "Nom du mandat"}), - "language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"}) - } - -class User(BaseModel): - """Datenmodell für einen Benutzer""" - id: int = Field(description="Eindeutige ID des Benutzers") - mandate_id: int = Field(description="ID des zugehörigen Mandanten") - username: str = Field(description="Benutzername für die Anmeldung") - email: Optional[str] = Field(None, description="E-Mail-Adresse des Benutzers") - full_name: Optional[str] = Field(None, description="Vollständiger Name des Benutzers") - language: str = Field(description="Bevorzugte Sprache des Benutzers") - disabled: Optional[bool] = Field(False, description="Gibt an, ob der Benutzer deaktiviert ist") - privilege: str = Field(description="Berechtigungsstufe") #sysadmin,admin,user - - label: Label = Field( - default=Label(default="Benutzer", translations={"en": "User", "fr": "Utilisateur"}), - description="Label für die Klasse" - ) - - # Labels für Attribute - field_labels: Dict[str, Label] = { - "id": Label(default="ID", translations={}), - "mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), - "username": Label(default="Benutzername", translations={"en": "Username", "fr": "Nom d'utilisateur"}), - "email": Label(default="E-Mail", translations={"en": "Email", "fr": "E-mail"}), - "full_name": Label(default="Vollständiger Name", translations={"en": "Full name", "fr": "Nom complet"}), - "language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"}), - "disabled": Label(default="Deaktiviert", translations={"en": "Disabled", "fr": "Désactivé"}), - "privilege": Label(default="Berechtigungsstufe", translations={"en": "Access level", "fr": "Niveau d'accès"}), - } - - -class UserInDB(User): - """Erweiterte Benutzerklasse mit Passwort-Hash""" - hashed_password: str = Field(description="Hash des Benutzerpassworts") - - label: Label = Field( - default=Label(default="Benutzer Zugriff", translations={"en": "User Access", "fr": "Accès de l'utilisateur"}), - description="Label für die Klasse" - ) - - # Zusätzliches Label für das Passwort-Feld - field_labels: Dict[str, Label] = { - "hashed_password": Label(default="Passwort-Hash", translations={"en": "Password hash", "fr": "Hachage de mot de passe"}) - } - - -class Token(BaseModel): - """Datenmodell für ein Authentifizierungstoken""" - access_token: str = Field(description="Das ausgestellte Zugriffstoken") - token_type: str = Field(description="Typ des Tokens (meist 'bearer')") - - label: Label = Field( - default=Label(default="Token", translations={"en": "Token", "fr": "Jeton"}), - description="Label für die Klasse" - ) - - # Labels für Attribute - field_labels: Dict[str, Label] = { - "access_token": Label(default="Zugriffstoken", translations={"en": "Access token", "fr": "Jeton d'accès"}), - "token_type": Label(default="Token-Typ", translations={"en": "Token type", "fr": "Type de jeton"}) - } \ No newline at end of file diff --git a/gwserver/_old_bk_modules/lucydom_interface.py b/gwserver/_old_bk_modules/lucydom_interface.py deleted file mode 100644 index 4d164cfc..00000000 --- a/gwserver/_old_bk_modules/lucydom_interface.py +++ /dev/null @@ -1,1265 +0,0 @@ -import os -import logging -import uuid -import shutil -from datetime import datetime, timedelta -import mimetypes -from typing import Dict, Any, List, Optional, Union, BinaryIO, Tuple -import importlib -import asyncio -import hashlib -from pathlib import Path - -from connectors.connector_db_json import DatabaseConnector - -logger = logging.getLogger(__name__) - -# Custom exceptions for file handling -class FileError(Exception): - """Base class for file handling exceptions.""" - pass - -class FileNotFoundError(FileError): - """Exception raised when a file is not found.""" - pass - -class FileStorageError(FileError): - """Exception raised when there's an error storing a file.""" - pass - -class FilePermissionError(FileError): - """Exception raised when there's a permission issue with a file.""" - pass - -class FileDeletionError(FileError): - """Exception raised when there's an error deleting a file.""" - pass - - -class LucyDOMInterface: - """ - Interface zur LucyDOM-Datenbank. - Verwendet den JSON-Konnektor für den Datenzugriff. - """ - - def __init__(self, mandate_id: int, user_id: int): - """ - Initialisiert das LucyDOM-Interface mit Mandanten- und Benutzerkontext. - - Args: - mandate_id: ID des aktuellen Mandanten - user_id: ID des aktuellen Benutzers - """ - self.mandate_id = mandate_id - self.user_id = user_id - - # Load configuration from config.ini - import configload - config = configload.load_config() - - # Datenverzeichnis - self.data_folder = "_database_lucydom" - os.makedirs(self.data_folder, exist_ok=True) - - # Upload und temp Verzeichnisse aus config.ini lesen - self.upload_dir = config.get('Module_AgentserviceInterface', 'UPLOAD_DIR', fallback='./_uploads') - self.temp_dir = os.path.join(self.upload_dir, "temp") - os.makedirs(self.upload_dir, exist_ok=True) - os.makedirs(self.temp_dir, exist_ok=True) - - # Datenmodell-Modul importieren - try: - self.model_module = importlib.import_module("modules.lucydom_model") - logger.info("lucydom_model erfolgreich importiert") - except ImportError as e: - logger.error(f"Fehler beim Importieren von lucydom_model: {e}") - raise - - # Konnektor erstellen - self.db = DatabaseConnector( - db_folder=self.data_folder, - mandate_id=mandate_id, - user_id=user_id - ) - - # Datenbank initialisieren, falls nötig - self._initialize_database() - - # Schedule periodic cleanup of temporary files - self._schedule_temp_file_cleanup() - - - - def _schedule_temp_file_cleanup(self): - """Schedule periodic cleanup of temporary files""" - try: - loop = asyncio.get_event_loop() - loop.create_task(self._periodic_temp_file_cleanup()) - except RuntimeError: - # If no event loop is available, log a warning - logger.warning("Kein Event-Loop verfügbar für temporäre Datei-Bereinigung") - - async def _periodic_temp_file_cleanup(self): - """Periodically clean up temporary files""" - while True: - try: - self.cleanup_temp_files() - # Run cleanup every 24 hours - await asyncio.sleep(24 * 60 * 60) - except Exception as e: - logger.error(f"Fehler bei der periodischen Bereinigung temporärer Dateien: {str(e)}") - # If there's an error, wait 1 hour before trying again - await asyncio.sleep(60 * 60) - - def cleanup_temp_files(self, max_age_hours: int = 24): - """ - Clean up temporary files older than the specified age - - Args: - max_age_hours: Maximum age of temporary files in hours - """ - try: - now = datetime.now() - count = 0 - - for item in os.listdir(self.temp_dir): - item_path = os.path.join(self.temp_dir, item) - if os.path.isfile(item_path): - # Check file age - file_time = datetime.fromtimestamp(os.path.getmtime(item_path)) - if now - file_time > timedelta(hours=max_age_hours): - try: - os.remove(item_path) - count += 1 - except Exception as e: - logger.warning(f"Konnte temporäre Datei nicht löschen: {item_path} - {str(e)}") - - logger.info(f"{count} temporäre Dateien bereinigt") - except Exception as e: - logger.error(f"Fehler bei der Bereinigung temporärer Dateien: {str(e)}") - - def cleanup_orphaned_files(self): - """ - Clean up orphaned files that exist on disk but don't have records in the database - """ - try: - # Get all file records from the database - all_files = self.get_all_files() - db_file_paths = {file.get("path") for file in all_files if file.get("path")} - - # Scan the upload directory - count = 0 - for root, _, files in os.walk(self.upload_dir): - # Skip the temp directory - if os.path.normpath(root) == os.path.normpath(self.temp_dir): - continue - - for file in files: - file_path = os.path.join(root, file) - # If the file isn't in the database, delete it - if file_path not in db_file_paths: - try: - os.remove(file_path) - count += 1 - except Exception as e: - logger.warning(f"Konnte verwaiste Datei nicht löschen: {file_path} - {str(e)}") - - logger.info(f"{count} verwaiste Dateien bereinigt") - except Exception as e: - logger.error(f"Fehler bei der Bereinigung verwaister Dateien: {str(e)}") - - def _initialize_database(self): - """ - Initialisiert die Datenbank mit minimalen Objekten für den angemeldeten Benutzer im Mandanten, falls sie noch nicht existiert. - Ohne gültigen Benutzer keine Initialisierung. - Erstellt für jede im Datenmodell definierte Tabelle einen initialen Datensatz. - """ - effective_mandate_id = self.mandate_id - effective_user_id = self.user_id - if effective_mandate_id is None or effective_user_id is None: - #data available - return - - # Initialisierung von Standard-Prompts für verschiedene Bereiche - prompts = self.db.get_recordset("prompts") - if not prompts: - logger.info("Erstelle Standard-Prompts") - - # Standard-Prompts definieren - standard_prompts = [ - { - "mandate_id": effective_mandate_id, - "user_id": effective_user_id, - "content": "Recherchiere die aktuellen Markttrends und Entwicklungen im Bereich [THEMA]. Sammle Informationen zu führenden Unternehmen, innovativen Produkten oder Dienstleistungen und aktuellen Herausforderungen. Präsentiere die Ergebnisse in einer strukturierten Übersicht mit relevanten Daten und Quellen.", - "name": "Web Research: Marktforschung" - }, - { - "mandate_id": effective_mandate_id, - "user_id": effective_user_id, - "content": "Analysiere den beigefügten Datensatz zu [THEMA] und identifiziere die wichtigsten Trends, Muster und Auffälligkeiten. Führe statistische Berechnungen durch, um deine Erkenntnisse zu untermauern. Stelle die Ergebnisse in einer klar strukturierten Analyse dar und ziehe relevante Schlussfolgerungen.", - "name": "Analyse: Datenanalyse" - }, - { - "mandate_id": effective_mandate_id, - "user_id": effective_user_id, - "content": "Erstelle ein detailliertes Protokoll unserer Besprechung zum Thema [THEMA]. Erfasse alle besprochenen Punkte, getroffenen Entscheidungen und vereinbarten Maßnahmen. Strukturiere das Protokoll übersichtlich mit Tagesordnungspunkten, Teilnehmerliste und klaren Verantwortlichkeiten für die Follow-up-Aktionen.", - "name": "Protokoll: Besprechungsprotokoll" - }, - { - "mandate_id": effective_mandate_id, - "user_id": effective_user_id, - "content": "Entwickle ein UI/UX-Designkonzept für [ANWENDUNG/WEBSITE]. Berücksichtige die Zielgruppe, Hauptfunktionen und die Markenidentität. Beschreibe die visuelle Gestaltung, Navigation, Interaktionsmuster und Informationsarchitektur. Erläutere, wie das Design die Benutzerfreundlichkeit und das Nutzererlebnis optimiert.", - "name": "Design: UI/UX Design" - } - ] - - # Prompts erstellen - for prompt_data in standard_prompts: - created_prompt = self.db.record_create("prompts", prompt_data) - logger.info(f"Prompt '{prompt_data.get('name', 'Standard')}' wurde erstellt mit ID {created_prompt['id']}") - - # File utilities - Moved from agentservice_filehandling - def get_mime_type(self, file_path: str) -> str: - """ - Bestimmt den MIME-Typ einer Datei. - - Args: - file_path: Pfad zur Datei - - Returns: - Der erkannte MIME-Typ - """ - # Versuche, den MIME-Typ über den Dateipfad zu erkennen - mime_type, _ = mimetypes.guess_type(file_path) - - # Wenn kein MIME-Typ erkannt wurde, versuche es über die Dateiendung - if not mime_type: - ext = os.path.splitext(file_path)[1].lower()[1:] - mime_type = self.get_mime_type_from_extension(ext) - - return mime_type - - def get_mime_type_from_extension(self, extension: str) -> str: - """ - Bestimmt den MIME-Typ basierend auf der Dateiendung. - - Args: - extension: Die Dateiendung ohne Punkt - - Returns: - Der entsprechende MIME-Typ - """ - extension_to_mime = { - "pdf": "application/pdf", - "docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "doc": "application/msword", - "xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", - "xls": "application/vnd.ms-excel", - "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", - "ppt": "application/vnd.ms-powerpoint", - "csv": "text/csv", - "txt": "text/plain", - "json": "application/json", - "xml": "application/xml", - "html": "text/html", - "htm": "text/html", - "jpg": "image/jpeg", - "jpeg": "image/jpeg", - "png": "image/png", - "gif": "image/gif", - "webp": "image/webp", - "svg": "image/svg+xml", - "py": "text/x-python", - "js": "application/javascript", - "css": "text/css" - } - - return extension_to_mime.get(extension.lower(), "application/octet-stream") - - def determine_file_type(self, file_path: str) -> str: - """ - Bestimmt den Typ einer Datei basierend auf dem MIME-Typ. - - Args: - file_path: Pfad zur Datei - - Returns: - Art der Datei: "image", "document" oder "file" - """ - mime_type = self.get_mime_type(file_path) - - # Bildtypen - if mime_type.startswith("image/"): - return "image" - - # Dokumenttypen - document_types = [ - "application/pdf", - "application/vnd.openxmlformats-officedocument.wordprocessingml.document", # docx - "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", # xlsx - "application/vnd.openxmlformats-officedocument.presentationml.presentation", # pptx - "application/vnd.ms-excel", - "application/vnd.ms-powerpoint", - "application/msword", - "text/csv", - "text/plain", - "application/json", - "application/xml", - "text/html", - "text/x-python", - "application/javascript", - "text/css" - ] - - if any(mime_type.startswith(dt) for dt in document_types) or mime_type in document_types: - return "document" - - # Fallback für unbekannte Typen - return "file" - - def calculate_file_hash(self, file_content: bytes) -> str: - """ - Calculate SHA-256 hash of file content for deduplication - - Args: - file_content: Binary content of the file - - Returns: - SHA-256 hash as a hexadecimal string - """ - return hashlib.sha256(file_content).hexdigest() - - def _get_current_timestamp(self) -> str: - """Gibt den aktuellen Zeitstempel im ISO-Format zurück""" - return datetime.now().isoformat() - - def get_initial_id(self, table: str) -> Optional[int]: - """ - Gibt die initiale ID für eine Tabelle zurück. - - Args: - table: Name der Tabelle - - Returns: - Die initiale ID oder None, wenn nicht vorhanden - """ - return self.db.get_initial_id(table) - - - # Datei-Methoden - - def get_all_files(self) -> List[Dict[str, Any]]: - """Gibt alle Dateien des aktuellen Mandanten zurück""" - return self.db.get_recordset("files") - - def get_file(self, file_id: int) -> Optional[Dict[str, Any]]: - """Gibt eine Datei anhand ihrer ID zurück""" - files = self.db.get_recordset("files", record_filter={"id": file_id}) - if files: - return files[0] - return None - - def create_file(self, name: str, file_type: str, content_type: str = None, - size: int = None, path: str = None, file_hash: str = None) -> Dict[str, Any]: - """Erstellt einen neuen Dateieintrag""" - file_data = { - "mandate_id": self.mandate_id, - "user_id": self.user_id, - "name": name, - "type": file_type, - "content_type": content_type, - "size": size, - "path": path, - "hash": file_hash, - "upload_date": self._get_current_timestamp() - } - - return self.db.record_create("files", file_data) - - def update_file(self, file_id: int, update_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Aktualisiert eine vorhandene Datei - - Args: - file_id: ID der zu aktualisierenden Datei - update_data: Dictionary mit zu aktualisierenden Feldern - - Returns: - Das aktualisierte Datei-Objekt - """ - # Prüfen, ob die Datei existiert - file = self.get_file(file_id) - if not file: - raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") - - # Datei aktualisieren - return self.db.record_modify("files", file_id, update_data) - - def check_for_duplicate_file(self, file_hash: str) -> Optional[Dict[str, Any]]: - """ - Check if a file with the same hash already exists - - Args: - file_hash: SHA-256 hash of the file content - - Returns: - File record if a duplicate exists, None otherwise - """ - files = self.db.get_recordset("files", record_filter={"hash": file_hash}) - if files: - return files[0] - return None - - def save_uploaded_file(self, file_content: bytes, file_name: str) -> Dict[str, Any]: - """ - Speichert eine hochgeladene Datei und erstellt einen Datenbankeintrag. - - Args: - file_content: Binärdaten der Datei - file_name: Name der Datei - - Returns: - Dictionary mit Metadaten der gespeicherten Datei - """ - try: - # Debug: Log the start of the file upload process - logger.info(f"Starting upload process for file: {file_name}") - logger.info(f"Upload directory: {self.upload_dir}, Mandate ID: {self.mandate_id}") - - # Debug: Check if file_content is valid bytes - if not isinstance(file_content, bytes): - logger.error(f"Invalid file_content type: {type(file_content)}") - raise ValueError(f"file_content must be bytes, got {type(file_content)}") - - # Calculate file hash for deduplication - file_hash = self.calculate_file_hash(file_content) - logger.debug(f"Calculated file hash: {file_hash}") - - # Check for duplicate - existing_file = self.check_for_duplicate_file(file_hash) - if existing_file: - # Simply return the existing file metadata - logger.info(f"Duplikat gefunden für {file_name}: {existing_file['id']}") - return existing_file - - # Generiere eindeutige ID - file_id = f"file_{uuid.uuid4()}" - logger.debug(f"Generated file ID: {file_id}") - - # Sanitize filename - safe_filename = Path(file_name).name # Get only the filename part - logger.debug(f"Sanitized filename: {safe_filename}") - - # Create parent directories if needed - mandate_upload_dir = os.path.join(self.upload_dir, str(self.mandate_id)) - logger.debug(f"Mandate upload directory: {mandate_upload_dir}") - - # Debug: Check if mandate upload directory exists - if not os.path.exists(mandate_upload_dir): - logger.info(f"Creating mandate upload directory: {mandate_upload_dir}") - - os.makedirs(mandate_upload_dir, exist_ok=True) - - # Dateipfad erstellen mit Mandant als Unterverzeichnis - file_path = os.path.join(mandate_upload_dir, f"{file_id}_{safe_filename}") - logger.debug(f"Full file path: {file_path}") - - # Datei speichern - logger.info(f"Writing file content to: {file_path}") - with open(file_path, "wb") as f: - f.write(file_content) - - # Verify file was created - if not os.path.exists(file_path): - logger.error(f"File was not created at path: {file_path}") - raise FileStorageError(f"File could not be created at {file_path}") - else: - logger.info(f"File successfully saved to: {file_path}") - - # Dateigröße bestimmen - file_size = len(file_content) - - # MIME-Typ und Dateityp bestimmen - mime_type = self.get_mime_type(file_path) - file_type = self.determine_file_type(file_path) - - # Erstelle Metadaten - file_meta = { - "id": file_id, - "name": file_name, - "path": file_path, - "size": file_size, - "type": file_type, - "content_type": mime_type, - "hash": file_hash, - "upload_date": datetime.now().isoformat(), - "mandate_id": self.mandate_id, - "user_id": self.user_id - } - - logger.debug(f"File metadata: {file_meta}") - - # Speichere in der Datenbank - logger.info(f"Saving file metadata to database for file: {file_id}") - db_file = self.create_file( - name=file_name, - file_type=file_type, - content_type=mime_type, - size=file_size, - path=file_path, - file_hash=file_hash - ) - - # Debug: Verify database record was created - if not db_file: - logger.warning(f"Database record for file {file_id} was not created properly") - else: - logger.info(f"Database record created for file {file_id}") - - # Wenn Datenbank-ID vorhanden ist, übernehme sie - if db_file and "id" in db_file: - file_meta["id"] = db_file["id"] - - logger.info(f"File upload process completed for: {file_name}") - return file_meta - - except Exception as e: - # If an error occurs, clean up any partial file - if 'file_path' in locals() and os.path.exists(file_path): - try: - logger.warning(f"Cleaning up partial file: {file_path}") - os.remove(file_path) - except Exception as cleanup_error: - logger.error(f"Error cleaning up partial file: {cleanup_error}") - - logger.error(f"Error in save_uploaded_file for {file_name}: {str(e)}", exc_info=True) - raise FileStorageError(f"Fehler beim Speichern der Datei: {str(e)}") - - - - async def read_file_content(self, file_id: str) -> Optional[bytes]: - """ - Reads the content of a file by ID - - Args: - file_id: ID of the file - - Returns: - File content as bytes or None if not found - """ - try: - # Get file metadata - file = self.get_file(file_id) - - if not file or "path" not in file: - raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") - - file_path = file["path"] - - # Check if file exists - if not os.path.exists(file_path): - raise FileNotFoundError(f"Datei nicht gefunden: {file_path}") - - # Read file content - with open(file_path, "rb") as f: - content = f.read() - - return content - except FileNotFoundError as e: - # Re-raise FileNotFoundError as is - raise - except Exception as e: - logger.error(f"Fehler beim Lesen der Datei {file_id}: {str(e)}") - raise FileError(f"Fehler beim Lesen der Datei: {str(e)}") - - def download_file(self, file_id: str) -> Optional[Dict[str, Any]]: - """ - Gibt eine Datei zum Download zurück. - - Args: - file_id: ID der Datei - - Returns: - Dictionary mit Dateidaten und -metadaten oder None, wenn nicht gefunden - """ - try: - # Suche die Datei in der Datenbank - file = self.get_file(file_id) - - if not file or "path" not in file: - raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") - - file_path = file["path"] - - # Prüfe, ob die Datei existiert - if not os.path.exists(file_path): - raise FileNotFoundError(f"Datei nicht gefunden: {file_path}") - - # Lese die Datei - with open(file_path, "rb") as f: - file_content = f.read() - - return { - "id": file_id, - "name": file.get("name", os.path.basename(file_path)), - "content_type": file.get("content_type", self.get_mime_type(file_path)), - "size": file.get("size", len(file_content)), - "path": file_path, - "content": file_content - } - except FileNotFoundError as e: - # Re-raise FileNotFoundError as is - raise - except Exception as e: - logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}") - raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}") - - def delete_file(self, file_id: str) -> bool: - """ - Löscht eine Datei aus der Datenbank und dem Dateisystem. - - Args: - file_id: ID der Datei - - Returns: - True bei Erfolg, False bei Fehler - """ - try: - # Suche die Datei in der Datenbank - file = self.get_file(file_id) - - if not file: - raise FileNotFoundError(f"Datei mit ID {file_id} nicht gefunden") - - # Prüfe, ob die Datei zum aktuellen Mandanten gehört - if file.get("mandate_id") != self.mandate_id: - raise FilePermissionError(f"Keine Berechtigung zum Löschen der Datei {file_id}") - - # Speichere den Dateipfad - file_path = file.get("path") - - # Check for other references to this file (by hash) - file_hash = file.get("hash") - if file_hash: - other_references = [f for f in self.db.get_recordset("files", record_filter={"hash": file_hash}) - if f.get("id") != file_id] - - # If other files reference this content, only delete the database entry - if other_references: - logger.info(f"Andere Referenzen auf den Dateiinhalt gefunden, nur DB-Eintrag wird gelöscht: {file_id}") - return self.db.record_delete("files", file_id) - - # Lösche den Datenbankeintrag - db_success = self.db.record_delete("files", file_id) - - # Wenn der Datenbankeintrag erfolgreich gelöscht wurde und ein Dateipfad vorhanden ist, - # lösche auch die Datei - if db_success and file_path and os.path.exists(file_path): - try: - os.remove(file_path) - return True - except Exception as e: - logger.error(f"Fehler beim physischen Löschen der Datei {file_path}: {str(e)}") - # Datenbankdatei wurde gelöscht, physische Datei nicht - trotzdem Erfolg melden - return True - - return db_success - except FileNotFoundError as e: - # Pass through FileNotFoundError - raise - except FilePermissionError as e: - # Pass through FilePermissionError - raise - except Exception as e: - logger.error(f"Fehler beim Löschen der Datei {file_id}: {str(e)}") - raise FileDeletionError(f"Fehler beim Löschen der Datei: {str(e)}") - - - # Prompt-Methoden - - def get_all_prompts(self) -> List[Dict[str, Any]]: - """Gibt alle Prompts des aktuellen Mandanten zurück""" - return self.db.get_recordset("prompts") - - def get_prompt(self, prompt_id: int) -> Optional[Dict[str, Any]]: - """Gibt einen Prompt anhand seiner ID zurück""" - prompts = self.db.get_recordset("prompts", record_filter={"id": prompt_id}) - if prompts: - return prompts[0] - return None - - def create_prompt(self, content: str, name: str) -> Dict[str, Any]: - """Erstellt einen neuen Prompt""" - prompt_data = { - "mandate_id": self.mandate_id, - "user_id": self.user_id, - "content": content, - "name": name, - "created_at": self._get_current_timestamp() - } - - return self.db.record_create("prompts", prompt_data) - - def update_prompt(self, prompt_id: int, content: str = None, name: str = None) -> Dict[str, Any]: - """ - Aktualisiert einen vorhandenen Prompt - - Args: - prompt_id: ID des zu aktualisierenden Prompts - content: Neuer Inhalt des Prompts - - Returns: - Das aktualisierte Prompt-Objekt - """ - # Prüfen, ob der Prompt existiert - prompt = self.get_prompt(prompt_id) - if not prompt: - return None - - # Daten für die Aktualisierung vorbereiten - prompt_data = {} - - if content is not None: - prompt_data["content"] = content - if name is not None: - prompt_data["name"] = name - - # Prompt aktualisieren - return self.db.record_modify("prompts", prompt_id, prompt_data) - - def delete_prompt(self, prompt_id: int) -> bool: - """ - Löscht einen Prompt aus der Datenbank - - Args: - prompt_id: ID des zu löschenden Prompts - - Returns: - True, wenn der Prompt erfolgreich gelöscht wurde, sonst False - """ - return self.db.record_delete("prompts", prompt_id) - - - # Workflow Methoden - - def get_all_workflows(self) -> List[Dict[str, Any]]: - """Gibt alle Workflows des aktuellen Mandanten zurück""" - return self.db.get_recordset("workflows") - - def get_workflows_by_user(self, user_id: int) -> List[Dict[str, Any]]: - """Gibt alle Workflows eines Benutzers zurück""" - return self.db.get_recordset("workflows", record_filter={"user_id": user_id}) - - def get_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]: - """Gibt einen Workflow anhand seiner ID zurück""" - workflows = self.db.get_recordset("workflows", record_filter={"id": workflow_id}) - if workflows: - return workflows[0] - return None - - def create_workflow(self, workflow_data: Dict[str, Any]) -> Dict[str, Any]: - """Erstellt einen neuen Workflow in der Datenbank""" - # Stellen Sie sicher, dass mandate_id und user_id gesetzt sind - if "mandate_id" not in workflow_data: - workflow_data["mandate_id"] = self.mandate_id - - if "user_id" not in workflow_data: - workflow_data["user_id"] = self.user_id - - # Zeitstempel setzen, falls nicht vorhanden - current_time = self._get_current_timestamp() - if "started_at" not in workflow_data: - workflow_data["started_at"] = current_time - - if "last_activity" not in workflow_data: - workflow_data["last_activity"] = current_time - - return self.db.record_create("workflows", workflow_data) - - def update_workflow(self, workflow_id: str, workflow_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Aktualisiert einen vorhandenen Workflow. - - Args: - workflow_id: ID des zu aktualisierenden Workflows - workflow_data: Neue Daten für den Workflow - - Returns: - Das aktualisierte Workflow-Objekt - """ - # Prüfen, ob der Workflow existiert - workflow = self.get_workflow(workflow_id) - if not workflow: - return None - - # Aktualisierungszeit setzen - workflow_data["last_activity"] = self._get_current_timestamp() - - # Workflow aktualisieren - return self.db.record_modify("workflows", workflow_id, workflow_data) - - def delete_workflow(self, workflow_id: str) -> bool: - """ - Löscht einen Workflow aus der Datenbank. - - Args: - workflow_id: ID des zu löschenden Workflows - - Returns: - True bei Erfolg, False wenn der Workflow nicht existiert - """ - # Prüfen, ob der Workflow existiert - workflow = self.get_workflow(workflow_id) - if not workflow: - return False - - # Prüfen, ob der Benutzer der Eigentümer ist oder Admin-Rechte hat - if workflow.get("user_id") != self.user_id: - # Hier könnte eine Prüfung auf Admin-Rechte erfolgen - return False - - # Workflow löschen - return self.db.record_delete("workflows", workflow_id) - - def get_workflow_messages(self, workflow_id: str) -> List[Dict[str, Any]]: - """Gibt alle Nachrichten eines Workflows zurück""" - return self.db.get_recordset("workflow_messages", record_filter={"workflow_id": workflow_id}) - - def create_workflow_message(self, message_data: Dict[str, Any]) -> Dict[str, Any]: - """Erstellt eine neue Nachricht für einen Workflow - - Args: - message_data: Die Nachrichtendaten - - Returns: - Die erstellte Nachricht oder None bei Fehler - """ - try: - # Check if required fields are present - required_fields = ["id", "workflow_id"] - for field in required_fields: - if field not in message_data: - logger.error(f"Pflichtfeld '{field}' fehlt in message_data") - raise ValueError(f"Pflichtfeld '{field}' fehlt in den Nachrichtendaten") - - # Validate that ID is not None - if message_data["id"] is None: - message_data["id"] = f"msg_{uuid.uuid4()}" - logger.warning(f"Automatisch generierte ID für Workflow-Nachricht: {message_data['id']}") - - # Stellen Sie sicher, dass die benötigten Felder vorhanden sind - if "created_at" not in message_data: - message_data["created_at"] = self._get_current_timestamp() - - # Debug-Log für die zu erstellenden Daten - logger.debug(f"Erstelle Workflow-Nachricht mit Daten: {message_data}") - - return self.db.record_create("workflow_messages", message_data) - except Exception as e: - logger.error(f"Fehler beim Erstellen der Workflow-Nachricht: {str(e)}") - # Return None instead of raising to avoid cascading failures - return None - - def update_workflow_message(self, message_id: str, message_data: Dict[str, Any]) -> Dict[str, Any]: - """ - Aktualisiert eine bestehende Workflow-Nachricht in der Datenbank - with improved document handling. - - Args: - message_id: ID der Nachricht - message_data: Zu aktualisierende Daten - - Returns: - Das aktualisierte Nachrichtenobjekt oder None bei Fehler - """ - try: - # Print debug info - print(f"Updating message {message_id} in database") - - # Ensure message_id is provided - if not message_id: - logger.error("No message_id provided for update_workflow_message") - raise ValueError("message_id cannot be empty") - - # Check if message exists in database - messages = self.db.get_recordset("workflow_messages", record_filter={"id": message_id}) - if not messages: - logger.warning(f"Message with ID {message_id} does not exist in database") - - # If message doesn't exist but we have workflow_id, create it - if "workflow_id" in message_data: - logger.info(f"Creating new message with ID {message_id} for workflow {message_data.get('workflow_id')}") - return self.db.record_create("workflow_messages", message_data) - else: - logger.error(f"Workflow ID missing for new message {message_id}") - return None - - # Ensure documents array is handled properly - if "documents" in message_data: - logger.info(f"Message {message_id} has {len(message_data['documents'])} documents") - - # Make sure we're not storing huge content in the database - # For each document, ensure content size is reasonable - documents_to_store = [] - for doc in message_data["documents"]: - doc_copy = doc.copy() - - # Process contents array if it exists - if "contents" in doc_copy: - # Ensure contents is not too large - limit text size - for content in doc_copy["contents"]: - if content.get("type") == "text" and "text" in content: - text = content["text"] - if len(text) > 1000: # Limit text preview to 1000 chars - content["text"] = text[:1000] + "... [truncated]" - - documents_to_store.append(doc_copy) - - # Replace with the processed documents - message_data["documents"] = documents_to_store - - # Log the update data size for debugging - update_data_size = len(str(message_data)) - logger.debug(f"Update data size: {update_data_size} bytes") - - # Ensure ID is in the dataset - if 'id' not in message_data: - message_data['id'] = message_id - - # Update the message - updated_message = self.db.record_modify("workflow_messages", message_id, message_data) - if updated_message: - logger.info(f"Message {message_id} updated successfully") - else: - logger.warning(f"Failed to update message {message_id}") - - return updated_message - except Exception as e: - logger.error(f"Error updating message {message_id}: {str(e)}", exc_info=True) - # Re-raise with full information - raise ValueError(f"Error updating message {message_id}: {str(e)}") - - - - def get_workflow_logs(self, workflow_id: str) -> List[Dict[str, Any]]: - """Gibt alle Log-Einträge eines Workflows zurück""" - return self.db.get_recordset("workflow_logs", record_filter={"workflow_id": workflow_id}) - - def create_workflow_log(self, log_data: Dict[str, Any]) -> Dict[str, Any]: - """Erstellt einen neuen Log-Eintrag für einen Workflow""" - # Stellen Sie sicher, dass die benötigten Felder vorhanden sind - if "timestamp" not in log_data: - log_data["timestamp"] = self._get_current_timestamp() - - return self.db.record_create("workflow_logs", log_data) - - def save_workflow_state(self, workflow: Dict[str, Any], save_messages: bool = True, save_logs: bool = True) -> bool: - """ - Speichert den kompletten Zustand eines Workflows in der Datenbank. - Dies umfasst den Workflow selbst, Nachrichten und Logs. - - Args: - workflow: Das vollständige Workflow-Objekt - save_messages: Flag, ob Nachrichten gespeichert werden sollen - save_logs: Flag, ob Logs gespeichert werden sollen - - Returns: - True bei Erfolg, False bei Fehler - """ - try: - workflow_id = workflow.get("id") - if not workflow_id: - return False - - # Extrahiere nur die für die Datenbank relevanten Workflow-Felder - workflow_db_data = { - "id": workflow_id, - "mandate_id": workflow.get("mandate_id", self.mandate_id), - "user_id": workflow.get("user_id", self.user_id), - "name": workflow.get("name", f"Workflow {workflow_id}"), - "status": workflow.get("status", "unknown"), - "started_at": workflow.get("started_at", self._get_current_timestamp()), - "last_activity": workflow.get("last_activity", self._get_current_timestamp()), - "completed_at": workflow.get("completed_at"), - "data_stats": workflow.get("data_stats", {}) - } - - # Prüfen, ob der Workflow bereits existiert - existing_workflow = self.get_workflow(workflow_id) - if existing_workflow: - self.update_workflow(workflow_id, workflow_db_data) - else: - self.create_workflow(workflow_db_data) - - - # Nachrichten speichern - if save_messages and "messages" in workflow: - # Bestehende Nachrichten abrufen - existing_messages = {msg["id"]: msg for msg in self.get_workflow_messages(workflow_id)} - - for message in workflow["messages"]: - message_id = message.get("id") - if not message_id: - continue - - # Nur relevante Daten für die Datenbank extrahieren - message_data = { - "id": message_id, - "workflow_id": workflow_id, - "sequence_no": message.get("sequence_no", 0), - "role": message.get("role", "unknown"), - "content": message.get("content"), - "agent_type": message.get("agent_type"), - "created_at": message.get("started_at", self._get_current_timestamp()), - # IMPORTANT: Include documents field to persist file attachments - "documents": message.get("documents", []) - } - - # Debug logging for documents - doc_count = len(message.get("documents", [])) - if doc_count > 0: - logger.info(f"Message {message_id} has {doc_count} documents to save") - - # Nachricht erstellen oder aktualisieren - if message_id in existing_messages: - self.db.record_modify("workflow_messages", message_id, message_data) - else: - self.db.record_create("workflow_messages", message_data) - - # Logs speichern - if save_logs and "logs" in workflow: - # Bestehende Logs abrufen - existing_logs = {log["id"]: log for log in self.get_workflow_logs(workflow_id)} - - for log in workflow["logs"]: - log_id = log.get("id") - if not log_id: - continue - - # Nur relevante Daten für die Datenbank extrahieren - log_data = { - "id": log_id, - "workflow_id": workflow_id, - "message": log.get("message", ""), - "type": log.get("type", "info"), - "timestamp": log.get("timestamp", self._get_current_timestamp()), - "agent_id": log.get("agent_id"), - "agent_name": log.get("agent_name") - } - - # Log erstellen oder aktualisieren - if log_id in existing_logs: - self.db.record_modify("workflow_logs", log_id, log_data) - else: - self.db.record_create("workflow_logs", log_data) - - return True - except Exception as e: - logger.error(f"Fehler beim Speichern des Workflow-Zustands: {str(e)}") - return False - - - def load_workflow_state(self, workflow_id: str) -> Optional[Dict[str, Any]]: - """ - Lädt den kompletten Zustand eines Workflows aus der Datenbank. - Dies umfasst den Workflow selbst, Nachrichten und Logs. - - Args: - workflow_id: ID des zu ladenden Workflows - - Returns: - Das vollständige Workflow-Objekt oder None bei Fehler - """ - try: - # Basis-Workflow laden - workflow = self.get_workflow(workflow_id) - if not workflow: - return None - - # Log the workflow base retrieval - logger.debug(f"Loaded base workflow {workflow_id} from database") - - # Nachrichten laden - messages = self.get_workflow_messages(workflow_id) - # Nach Sequenznummer sortieren - messages.sort(key=lambda x: x.get("sequence_no", 0)) - - # Debug log for messages and document counts - message_count = len(messages) - logger.debug(f"Loaded {message_count} messages for workflow {workflow_id}") - - # Log document counts for each message - for msg in messages: - doc_count = len(msg.get("documents", [])) - if doc_count > 0: - logger.info(f"Message {msg.get('id')} has {doc_count} documents loaded from database") - # Log document details for debugging - for i, doc in enumerate(msg.get("documents", [])): - source = doc.get("source", {}) - logger.debug(f"Document {i+1}: {source.get('name', 'unnamed')} (ID: {source.get('id', 'unknown')})") - - # Logs laden - logs = self.get_workflow_logs(workflow_id) - # Nach Zeitstempel sortieren - logs.sort(key=lambda x: x.get("timestamp", "")) - - # Vollständiges Workflow-Objekt zusammenbauen - complete_workflow = workflow.copy() - complete_workflow["messages"] = messages - complete_workflow["logs"] = logs - - return complete_workflow - except Exception as e: - logger.error(f"Fehler beim Laden des Workflow-Zustands: {str(e)}") - return None - - - # DELETE Workflow message elements - - def delete_workflow_message(self, workflow_id: str, message_id: str) -> bool: - """ - Löscht eine Nachricht aus einem Workflow in der Datenbank. - - Args: - workflow_id: ID des zugehörigen Workflows - message_id: ID der zu löschenden Nachricht - - Returns: - True bei Erfolg, False bei Fehler - """ - try: - # Prüfen, ob die Nachricht existiert - messages = self.get_workflow_messages(workflow_id) - message = next((m for m in messages if m.get("id") == message_id), None) - - if not message: - logger.warning(f"Nachricht {message_id} für Workflow {workflow_id} nicht gefunden") - return False - - # Nachricht aus der Datenbank löschen - return self.db.record_delete("workflow_messages", message_id) - except Exception as e: - logger.error(f"Fehler beim Löschen der Nachricht {message_id}: {str(e)}") - return False - - def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: str) -> bool: - """ - Entfernt eine Dateireferenz aus einer Nachricht. - Die Datei selbst wird nicht gelöscht, nur die Referenz in der Nachricht. - Enhanced version with improved file matching. - - Args: - workflow_id: ID des zugehörigen Workflows - message_id: ID der Nachricht - file_id: ID der zu entfernenden Datei - - Returns: - True bei Erfolg, False bei Fehler - """ - try: - # Log operation - logger.info(f"Removing file {file_id} from message {message_id} in workflow {workflow_id}") - - # Get all workflow messages - all_messages = self.get_workflow_messages(workflow_id) - logger.debug(f"Workflow {workflow_id} has {len(all_messages)} messages") - - # Try different approaches to find the message - message = None - - # Exact match - message = next((m for m in all_messages if m.get("id") == message_id), None) - - # Case-insensitive match - if not message and isinstance(message_id, str): - message = next((m for m in all_messages - if isinstance(m.get("id"), str) and m.get("id").lower() == message_id.lower()), None) - - # Partial match (starts with) - if not message and isinstance(message_id, str): - message = next((m for m in all_messages - if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None) - - if not message: - logger.warning(f"Message {message_id} not found in workflow {workflow_id}") - return False - - # Log the found message - logger.info(f"Found message: {message.get('id')}") - - # Check if message has documents - if "documents" not in message or not message["documents"]: - logger.warning(f"No documents in message {message_id}") - return False - - # Log existing documents - documents = message.get("documents", []) - logger.debug(f"Message has {len(documents)} documents") - for i, doc in enumerate(documents): - doc_id = doc.get("id", "unknown") - source = doc.get("source", {}) - source_id = source.get("id", "unknown") - logger.debug(f"Document {i}: doc_id={doc_id}, source_id={source_id}") - - # Create a new list of documents without the one to delete - updated_documents = [] - removed = False - - for doc in documents: - doc_id = doc.get("id") - source = doc.get("source", {}) - source_id = source.get("id") - - # Flexible matching approach - should_remove = ( - (doc_id == file_id) or - (source_id == file_id) or - (isinstance(doc_id, str) and file_id in doc_id) or - (isinstance(source_id, str) and file_id in source_id) - ) - - if should_remove: - removed = True - logger.info(f"Found file to remove: doc_id={doc_id}, source_id={source_id}") - else: - updated_documents.append(doc) - - if not removed: - logger.warning(f"No matching file {file_id} found in message {message_id}") - return False - - # Update message with modified documents array - message_update = { - "documents": updated_documents - } - - # Apply the update directly to the database - updated = self.db.record_modify("workflow_messages", message["id"], message_update) - - if updated: - logger.info(f"Successfully removed file {file_id} from message {message_id}") - return True - else: - logger.warning(f"Failed to update message {message_id} in database") - return False - - except Exception as e: - logger.error(f"Error removing file {file_id} from message {message_id}: {str(e)}") - return False - - -# Singleton-Factory für LucyDOMInterface-Instanzen pro Kontext -_lucydom_interfaces = {} - -def get_lucydom_interface(mandate_id: int = 0, user_id: int = 0) -> LucyDOMInterface: - """ - Gibt eine LucyDOMInterface-Instanz für den angegebenen Kontext zurück. - Wiederverwendet bestehende Instanzen. - """ - context_key = f"{mandate_id}_{user_id}" - if context_key not in _lucydom_interfaces: - _lucydom_interfaces[context_key] = LucyDOMInterface(mandate_id, user_id) - return _lucydom_interfaces[context_key] - -# Init -get_lucydom_interface() diff --git a/gwserver/_old_bk_modules/lucydom_model.py b/gwserver/_old_bk_modules/lucydom_model.py deleted file mode 100644 index cdb056b4..00000000 --- a/gwserver/_old_bk_modules/lucydom_model.py +++ /dev/null @@ -1,149 +0,0 @@ -from pydantic import BaseModel, Field -from typing import List, Dict, Any, Optional -from datetime import datetime - - -class Label(BaseModel): - """Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen""" - default: str - translations: Dict[str, str] = {} - - def get_label(self, language: str = None): - """Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar""" - if language and language in self.translations: - return self.translations[language] - return self.default - - -class FileItem(BaseModel): - """Datenmodell für ein Datenobjekt""" - id: int = Field(description="Eindeutige ID des Datenobjekts") - mandate_id: int = Field(description="ID des zugehörigen Mandanten") - user_id: int = Field(description="ID des Erstellers") - name: str = Field(description="Name des Datenobjekts") - type: str = Field(description="Typ des Datenobjekts ('document', 'image', etc.)") - size: Optional[str] = Field(None, description="Größe des Datenobjekts") - upload_date: Optional[str] = Field(None, description="Datum des Hochladens") - content_type: Optional[str] = Field(None, description="Content-Type des Datenobjekts") - path: Optional[str] = Field(None, description="Pfad zum Datenobjekt") - - label: Label = Field( - default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}), - description="Label für die Klasse" - ) - - # Labels für Attribute - field_labels: Dict[str, Label] = { - "id": Label(default="ID", translations={}), - "mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), - "user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), - "name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}), - "type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}), - "size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}), - "upload_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}), - "content_type": Label(default="Content-Type", translations={"en": "Content type", "fr": "Type de contenu"}), - "path": Label(default="Pfad", translations={"en": "Path", "fr": "Chemin"}) - } - - -class Prompt(BaseModel): - """Datenmodell für einen Prompt""" - id: int = Field(description="Eindeutige ID des Prompts") - mandate_id: int = Field(description="ID des zugehörigen Mandanten") - user_id: int = Field(description="ID des Erstellers") - content: str = Field(description="Inhalt des Prompts") - name: str = Field(description="Anzeigename des Prompts") - - label: Label = Field( - default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}), - description="Label für die Klasse" - ) - - # Labels für Attribute - field_labels: Dict[str, Label] = { - "id": Label(default="ID", translations={}), - "mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}), - "user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}), - "content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}), - "name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}), - } - - -# Neue Workflow-Modellklassen - -class DocumentSource(BaseModel): - """Quelle eines Dokuments im Workflow""" - type: str = Field(description="Typ der Quelle ('prompt', 'file', 'clipboard')") - path: Optional[str] = Field(None, description="Speicherpfad (für Dateien)") - name: str = Field(description="Anzeigename der Datei") - size: Optional[int] = Field(None, description="Größe in Bytes") - lines: Optional[int] = Field(None, description="Zeilenanzahl (für Textdateien)") - content_type: Optional[str] = Field(None, description="MIME-Typ") - upload_date: Optional[str] = Field(None, description="Uploaddatum") - -class DocumentContent(BaseModel): - """Inhalt eines Dokuments im Workflow""" - label: Optional[str] = Field(None, description="Optionale Bezeichnung") - type: str = Field(description="Typ des Inhalts ('text', 'image', 'chart', etc.)") - text: Optional[str] = Field(None, description="Textinhalt") - is_extracted: Optional[bool] = Field(False, description="Flag, ob aus Originaldatei extrahiert") - -class Document(BaseModel): - """Dokument im Workflow (inkl. Prompt und referenzierte Dateien)""" - id: str = Field(description="Eindeutige ID des Dokuments") - source: DocumentSource = Field(description="Quellmetadaten") - contents: List[DocumentContent] = Field(description="Dokumentinhalte") - -class DataStats(BaseModel): - """Statistiken für Performance und Datennutzung""" - processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden") - token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)") - bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes") - bytes_received: Optional[int] = Field(None, description="Empfangene Bytes") - -class Message(BaseModel): - """Nachrichtenobjekt im Workflow""" - id: str = Field(description="Eindeutige ID der Nachricht") - workflow_id: str = Field(description="Referenz zum übergeordneten Workflow") - parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht") - started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung") - finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss") - sequence_no: int = Field(description="Sequenznummer für Sortierung") - - status: str = Field(description="Status der Nachricht ('pending', 'processing', 'completed', 'failed')") - role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')") - - data_stats: Optional[DataStats] = Field(None, description="Statistiken") - documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht") - content: Optional[str] = Field(None, description="Textinhalt der Nachricht") - agent_type: Optional[str] = Field(None, description="Typ des verwendeten Agenten") - -class Workflow(BaseModel): - """Workflow-Objekt für Multi-Agent-System""" - id: str = Field(description="Eindeutige ID des Workflows") - name: Optional[str] = Field(None, description="Name des Workflows") - mandate_id: int = Field(description="ID des Mandanten") - user_id: int = Field(description="ID des Benutzers") - status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')") - started_at: str = Field(description="Startzeitpunkt") - last_activity: str = Field(description="Zeitpunkt der letzten Aktivität") - current_round: int = Field(description="Aktuelle Runde") - waiting_for_user: bool = Field(False, description="Flag, ob auf Benutzereingabe gewartet wird") - - data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken") - messages: List[Message] = Field(default=[], description="Nachrichtenverlauf") - logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge") - -# Anfragemodelle für die API - -class WorkflowCreateRequest(BaseModel): - """Anfrage zur Erstellung eines neuen Workflows""" - name: Optional[str] = Field(None, description="Name des Workflows") - prompt: str = Field(description="Zu verwendender Prompt") - files: List[int] = Field(default=[], description="Liste von Datei-IDs") - -class UserInputRequest(BaseModel): - """Anfrage für Benutzereingabe an einen laufenden Workflow""" - message: str = Field(description="Nachricht des Benutzers") - additional_files: List[int] = Field(default=[], description="Liste zusätzlicher Datei-IDs") - diff --git a/gwserver/app.py b/gwserver/app.py index 68318ec0..6cef5efa 100644 --- a/gwserver/app.py +++ b/gwserver/app.py @@ -31,7 +31,8 @@ import modules.lucydom_interface as lucydom_model # Konfiguration des Loggers logging.basicConfig( level=logging.DEBUG, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + #format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', handlers=[logging.StreamHandler()] ) logger = logging.getLogger(__name__) diff --git a/gwserver/modules/agentservice_agent_analyst.py b/gwserver/modules/agentservice_agent_analyst.py index 11796d13..5d58fd29 100644 --- a/gwserver/modules/agentservice_agent_analyst.py +++ b/gwserver/modules/agentservice_agent_analyst.py @@ -1,209 +1,1657 @@ """ Datenanalyst-Agent für die Analyse und Interpretation von Daten. -Angepasst für das refaktorisierte Core-Modul. +Angepasst für das refaktorisierte Core-Modul mit AgentCommunicationProtocol. """ import logging import traceback -from typing import List, Dict, Any, Optional, Union -from datetime import datetime +import json +import re import uuid +import io +import base64 +from typing import List, Dict, Any, Optional, Union, Tuple +from datetime import datetime +import pandas as pd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +import plotly.express as px +import plotly.graph_objects as go from modules.agentservice_base import BaseAgent from connectors.connector_aichat_openai import ChatService from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils, FileUtils +from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol logger = logging.getLogger(__name__) class AnalystAgent(BaseAgent): - """Agent für die Analyse und Interpretation von Daten""" + """Agent for data analysis and interpretation""" def __init__(self): - """Initialisiert den Datenanalyst-Agenten""" + """Initialize the data analyst agent""" super().__init__() - self.id = "analyst" - self.name = "Datenanalyst" - self.type = "analyzer" - self.description = "Analysiert und interpretiert Daten" + self.id = "analyst_agent" + self.name = "Data Analyst" + self.type = "analyst" + self.description = "Analyzes and interprets data" self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation" - self.instructions = """ - Du bist der Datenanalyseagent. Deine Aufgabe: - 1. Vorliegende Daten untersuchen und interpretieren - 2. Erkenntnisse aus Informationen gewinnen - 3. Trends identifizieren und Zusammenhänge prüfen - 4. Daten visualisieren und Konzepte erklären - 5. Datenqualität bewerten und Handlungsempfehlungen geben - """ self.result_format = "AnalysisReport" - # Chat-Service initialisieren - self.chat_service = ChatService() + # Document capabilities + self.supports_documents = True + self.document_capabilities = ["read", "analyze", "extract"] + self.required_context = ["data_source", "analysis_objectives"] - # Utility-Klassen initialisieren + # Initialize protocol + self.protocol = AgentCommunicationProtocol() + + # Initialize utilities self.message_utils = MessageUtils() self.file_utils = FileUtils() - + + # Setup visualization defaults + self.plt_style = 'seaborn-v0_8-whitegrid' + self.default_figsize = (10, 6) + self.chart_dpi = 100 + plt.style.use(self.plt_style) + def get_agent_info(self) -> Dict[str, Any]: """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format - } + info = super().get_agent_info() + info.update({ + "metadata": { + "supported_formats": ["csv", "xlsx", "json", "text"], + "analysis_types": ["statistical", "trend", "comparative", "predictive", "clustering", "general"], + "visualization_types": ["bar", "line", "scatter", "histogram", "box", "heatmap", "pie"] + } + }) + return info - def get_prompt(self, message_context: Dict[str, Any]) -> str: + async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ - Generiert einen angepassten Prompt für den Datenanalysten. + Process a message and perform data analysis. Args: - message_context: Kontext der Nachricht + message: Input message + context: Optional context Returns: - Formatierter Prompt für den Datenanalysten + Analysis response """ - # Basis-Prompt - prompt = f""" - Du bist {self.name}, ein {self.type} Agent. + # Extract workflow_id from context or message + workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown") - {self.description} - - Fähigkeiten: {self.capabilities} - - {self.instructions} - - Analysiere die vorliegenden Daten. Präsentiere klar strukturierte Ergebnisse - mit einer Zusammenfassung, Detailanalyse und Handlungsempfehlungen. - """ - - # Dateitypspezifische Anweisungen hinzufügen (verkürzt) - document_types = self._get_document_types(message_context) - - if "csv" in document_types or "excel" in document_types: - prompt += "\nTABELLENDATEN: Identifiziere wichtige Spalten, Korrelationen und Trends." - - if "pdf" in document_types or "doc" in document_types: - prompt += "\nTEXTDATEN: Extrahiere zentrale Fakten und Schlüsselthemen." - - if "image" in document_types: - prompt += "\nBILDDATEN: Beschreibe und interpretiere dargestellte Informationen." - - return prompt.strip() - - def _get_document_types(self, message_context: Dict[str, Any]) -> List[str]: - """ - Extrahiert die Dateitypen aus dem Nachrichtenkontext. - - Args: - message_context: Kontext der Nachricht - - Returns: - Liste der Dateitypen - """ - document_types = [] - - # Versuche Dokumente aus dem Kontext zu extrahieren - documents = message_context.get("documents", []) - - for doc in documents: - source = doc.get("source", {}) - name = source.get("name", "").lower() - content_type = source.get("content_type", "").lower() - - # Dateityp aus Namen oder Content-Type ableiten - if name.endswith(".csv") or "csv" in content_type: - document_types.append("csv") - elif name.endswith((".xls", ".xlsx")) or "excel" in content_type or "spreadsheet" in content_type: - document_types.append("excel") - elif name.endswith(".pdf") or "pdf" in content_type: - document_types.append("pdf") - elif name.endswith((".doc", ".docx")) or "word" in content_type: - document_types.append("doc") - elif name.endswith((".jpg", ".jpeg", ".png", ".gif")) or "image" in content_type: - document_types.append("image") - - return document_types - - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und führt eine Datenanalyse durch. - - Args: - message: Die zu verarbeitende Nachricht - workflow: Der aktuelle Workflow - context: Zusätzlicher Kontext - log_func: Funktion für Workflow-Logging - - Returns: - Die generierte Antwort mit der Datenanalyse - """ - # Initialize logging - workflow_id = workflow.get("id", "unknown") + # Get or create logging_utils + log_func = context.get("log_func") if context else None logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"AnalystAgent startet Datenanalyse", "agents") - # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") + # Create status update using protocol + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Starting data analysis", + sender_id=self.id, + status="in_progress", + progress=0.0, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Create response structure + response = { + "role": "assistant", + "content": "", + "agent_id": self.id, + "agent_type": self.type, + "agent_name": self.name, + "result_format": self.result_format, + "workflow_id": workflow_id, + "documents": [] + } try: - # Prepare message context for generating the prompt - message_context = {"documents": context.get("documents", [])} if context else {} + # Extract task from message + task = message.get("content", "") - # Generate appropriate prompt based on the context - prompt = self.get_prompt(message_context) - logging_utils.info(f"Datenanalyse mit spezifischem Prompt gestartet", "agents") + # Process any attached documents and extract data + document_context = "" + data_frames = {} - # Prepare messages for the API - messages = [ - {"role": "system", "content": prompt}, - {"role": "user", "content": message.get("content", "")} - ] + if message.get("documents"): + logging_utils.info("Processing documents for analysis", "execution") + document_context, data_frames = await self._process_and_extract_data(message) + + # Update progress + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Documents processed, performing analysis", + sender_id=self.id, + status="in_progress", + progress=0.4, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) - # Add context messages if available - if context and "history" in context: - for history_item in context["history"]: - messages.append({ - "role": history_item.get("role", "user"), - "content": history_item.get("content", "") - }) + # If we don't have any data frames but expected to analyze data, report this issue + if not data_frames and any(term in task.lower() for term in ["analyze", "data", "csv", "excel", "file"]): + if message.get("documents"): + logging_utils.warning("No processable data found in the provided documents", "execution") + analysis_content = "## Data Analysis Report\n\nI couldn't find any processable data in the provided documents. Please ensure you've attached CSV, Excel, or other data files in a format I can analyze." + else: + logging_utils.warning("No documents provided for data analysis", "execution") + analysis_content = "## Data Analysis Report\n\nNo data documents were provided for analysis. Please attach CSV, Excel, or other data files for me to analyze." + + response["content"] = analysis_content + return response - # Call the API - logging_utils.info("Rufe AI-Service für die Analyse auf", "agents") - response_content = await self.chat_service.call_api(messages) - logging_utils.info("Analyse abgeschlossen", "agents") + # Determine analysis type and perform analysis + analysis_type = self._determine_analysis_type(task) + logging_utils.info(f"Performing {analysis_type} analysis", "execution") + + # Create enhanced prompt with document context + enhanced_prompt = self._create_enhanced_prompt(message, document_context, context) + + # Generate visualization documents if data is available + visualization_documents = [] + if data_frames: + logging_utils.info(f"Generating visualizations for {len(data_frames)} data sets", "execution") + visualization_documents = self._generate_visualizations(data_frames, analysis_type, workflow_id, task) + + # Add visualizations to response documents + response["documents"].extend(visualization_documents) + + # Update progress + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Visualizations created, finalizing analysis", + sender_id=self.id, + status="in_progress", + progress=0.7, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Generate analysis with included data insights if we have data frames + analysis_content = "" + if data_frames: + # Extract data insights to include in the analysis + data_insights = self._extract_data_insights(data_frames) + + # Add insights to the prompt + enhanced_prompt += f"\n\n=== DATA INSIGHTS ===\n{data_insights}" + + # Generate analysis with data insights + analysis_content = await self._generate_analysis(enhanced_prompt, analysis_type) + + # Include references to the visualization documents + if visualization_documents: + viz_references = "\n\n## Visualizations\n\n" + viz_references += "The following visualizations have been created to help understand the data:\n\n" + + for i, doc in enumerate(visualization_documents, 1): + doc_source = doc.get("source", {}) + doc_name = doc_source.get("name", f"Visualization {i}") + viz_references += f"{i}. **{doc_name}** - Available as an attached document\n" + + analysis_content += viz_references + else: + # Generate analysis based just on text if no data frames + analysis_content = await self._generate_analysis(enhanced_prompt, analysis_type) + + # Final progress update + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Analysis completed", + sender_id=self.id, + status="completed", + progress=1.0, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) # Set the content in the response - response["content"] = response_content + response["content"] = analysis_content - # Finalize the message - self.message_utils.finalize_message(response) - response["result_format"] = self.result_format + # Finish by sending result message to protocol if needed + if context and context.get("require_protocol_message"): + result_message = self.send_analysis_result( + analysis_content=analysis_content, + sender_id=self.id, + receiver_id=context.get("receiver_id", "workflow"), + task_id=context.get("task_id", f"analysis_{uuid.uuid4()}"), + analysis_data={ + "analysis_type": analysis_type, + "visualization_count": len(visualization_documents), + "data_frame_count": len(data_frames) + }, + context_id=workflow_id + ) + # Just log the message creation, don't need to return it + logging_utils.info(f"Created protocol result message: {result_message.id}", "execution") return response except Exception as e: - error_msg = f"Fehler bei der Datenanalyse: {str(e)}" + error_msg = f"Error during data analysis: {str(e)}" logging_utils.error(error_msg, "error") - # Create error response - response["content"] = f"## Fehler bei der Datenanalyse\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" - self.message_utils.finalize_message(response) + # Create error response using protocol + error_message = self.protocol.create_error_message( + error_description=error_msg, + sender_id=self.id, + error_type="analysis", + error_details={"traceback": traceback.format_exc()}, + context_id=workflow_id + ) + + # Set error content in the response + response["content"] = f"## Error during data analysis\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" + response["status"] = "error" return response + + async def _process_and_extract_data(self, message: Dict[str, Any]) -> Tuple[str, Dict[str, pd.DataFrame]]: + """ + Process documents and extract structured data. + + Args: + message: Input message with documents + + Returns: + Tuple of (document_context, data_frames_dict) + """ + document_context = "" + data_frames = {} + + if not message.get("documents"): + return document_context, data_frames + + # Extract document text (this will be our context) + if self.document_handler: + document_context = self.document_handler.merge_document_contents(message) + else: + document_context = self._extract_document_text(message) + + # Identify and process data files (CSV, Excel, etc.) + for document in message.get("documents", []): + source = document.get("source", {}) + filename = source.get("name", "") + file_id = source.get("id", "") + content_type = source.get("content_type", "") + + # Skip if not a recognizable data file + if not self._is_data_file(filename, content_type): + continue + + try: + # Try to get file content through document handler first + file_content = None + if self.document_handler: + file_content = await self.document_handler.get_file_content(file_id) + + # Process based on file type + if filename.lower().endswith('.csv'): + df = self._process_csv(file_content, filename) + if df is not None: + data_frames[filename] = df + + elif filename.lower().endswith(('.xlsx', '.xls')): + dfs = self._process_excel(file_content, filename) + for sheet_name, df in dfs.items(): + data_frames[f"{filename}::{sheet_name}"] = df + + elif filename.lower().endswith('.json'): + df = self._process_json(file_content, filename) + if df is not None: + data_frames[filename] = df + + except Exception as e: + logger.error(f"Error processing file {filename}: {str(e)}") + + return document_context, data_frames + + def _is_data_file(self, filename: str, content_type: str) -> bool: + """Check if a file is a processable data file""" + if filename.lower().endswith(('.csv', '.xlsx', '.xls', '.json')): + return True + + if content_type in ['text/csv', 'application/vnd.ms-excel', + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', + 'application/json']: + return True + + return False + + def _process_csv(self, file_content: bytes, filename: str) -> Optional[pd.DataFrame]: + """Process CSV file content into a pandas DataFrame""" + if file_content is None: + return None + + try: + # Try various encodings + for encoding in ['utf-8', 'latin1', 'cp1252']: + try: + # Use StringIO to create a file-like object + text_content = file_content.decode(encoding) + df = pd.read_csv(io.StringIO(text_content)) + + # Basic preprocessing + df = self._preprocess_dataframe(df) + return df + except UnicodeDecodeError: + continue + except Exception as e: + logger.error(f"Error processing CSV with {encoding} encoding: {str(e)}") + + # If all encodings fail, try one more time with errors='replace' + text_content = file_content.decode('utf-8', errors='replace') + df = pd.read_csv(io.StringIO(text_content)) + df = self._preprocess_dataframe(df) + return df + + except Exception as e: + logger.error(f"Failed to process CSV file {filename}: {str(e)}") + return None + + def _process_excel(self, file_content: bytes, filename: str) -> Dict[str, pd.DataFrame]: + """Process Excel file content into pandas DataFrames""" + result = {} + + if file_content is None: + return result + + try: + # Use BytesIO to create a file-like object + excel_file = io.BytesIO(file_content) + + # Try to read with pandas + excel_data = pd.ExcelFile(excel_file) + + # Process each sheet + for sheet_name in excel_data.sheet_names: + df = pd.read_excel(excel_file, sheet_name=sheet_name) + + # Basic preprocessing + df = self._preprocess_dataframe(df) + + # Only include if there's actual data + if not df.empty: + result[sheet_name] = df + + return result + + except Exception as e: + logger.error(f"Failed to process Excel file {filename}: {str(e)}") + return result + + def _process_json(self, file_content: bytes, filename: str) -> Optional[pd.DataFrame]: + """Process JSON file content into a pandas DataFrame""" + if file_content is None: + return None + + try: + # Decode and parse JSON + json_content = file_content.decode('utf-8') + data = json.loads(json_content) + + # Handle different JSON structures + if isinstance(data, list): + # List of records + df = pd.DataFrame(data) + elif isinstance(data, dict): + # Try to find a suitable data structure in the dict + if any(isinstance(v, list) for v in data.values()): + # Find the first list value to use as data + for key, value in data.items(): + if isinstance(value, list) and len(value) > 0: + df = pd.DataFrame(value) + break + else: + # No suitable list found + return None + else: + # Convert flat dict to a single-row DataFrame + df = pd.DataFrame([data]) + else: + # Unsupported structure + return None + + # Basic preprocessing + df = self._preprocess_dataframe(df) + return df + + except Exception as e: + logger.error(f"Failed to process JSON file {filename}: {str(e)}") + return None + + def _preprocess_dataframe(self, df: pd.DataFrame) -> pd.DataFrame: + """Perform basic preprocessing on a DataFrame""" + if df.empty: + return df + + # Remove completely empty rows and columns + df = df.dropna(how='all') + df = df.dropna(axis=1, how='all') + + # Try to convert string columns to numeric where appropriate + for col in df.columns: + # Skip if already numeric + if pd.api.types.is_numeric_dtype(df[col]): + continue + + # Skip if mostly non-numeric strings + if df[col].dtype == 'object': + # Check if more than 80% of non-NA values could be numeric + non_na_values = df[col].dropna() + if len(non_na_values) == 0: + continue + + # Try to convert to numeric and count successes + numeric_count = pd.to_numeric(non_na_values, errors='coerce').notna().sum() + if numeric_count / len(non_na_values) > 0.8: + # More than 80% can be converted to numeric, so convert the column + df[col] = pd.to_numeric(df[col], errors='coerce') + + # Try to parse date columns + for col in df.columns: + # Skip if not object dtype + if df[col].dtype != 'object': + continue + + # Check if column name suggests a date + if any(date_term in col.lower() for date_term in ['date', 'time', 'day', 'month', 'year']): + try: + # Try to parse as datetime + df[col] = pd.to_datetime(df[col], errors='coerce') + # Only keep the conversion if at least 80% succeeded + if df[col].notna().mean() < 0.8: + # Revert to original if too many NAs were introduced + df[col] = df[col].astype('object') + except: + pass + + return df + + def _extract_document_text(self, message: Dict[str, Any]) -> str: + """ + Extract text from documents (fallback method). + + Args: + message: Input message with documents + + Returns: + Extracted text + """ + text_content = "" + for document in message.get("documents", []): + source = document.get("source", {}) + name = source.get("name", "unnamed") + + text_content += f"\n\n--- {name} ---\n" + + for content in document.get("contents", []): + if content.get("type") == "text": + text_content += content.get("text", "") + + return text_content + + def _determine_analysis_type(self, task: str) -> str: + """ + Determine the type of analysis based on the task. + + Args: + task: The analysis task + + Returns: + Analysis type + """ + task_lower = task.lower() + + # Check for statistical analysis + if any(term in task_lower for term in ["statistics", "statistical", "mean", "median", "variance"]): + return "statistical" + + # Check for trend analysis + elif any(term in task_lower for term in ["trend", "pattern", "time series", "historical"]): + return "trend" + + # Check for comparative analysis + elif any(term in task_lower for term in ["compare", "comparison", "versus", "vs", "difference"]): + return "comparative" + + # Check for predictive analysis + elif any(term in task_lower for term in ["predict", "forecast", "future", "projection"]): + return "predictive" + + # Check for clustering or categorization + elif any(term in task_lower for term in ["cluster", "segment", "categorize", "classify"]): + return "clustering" + + # Default to general analysis + else: + return "general" + + def _extract_data_insights(self, data_frames: Dict[str, pd.DataFrame]) -> str: + """ + Extract basic insights from data frames. + + Args: + data_frames: Dictionary of data frames + + Returns: + Extracted insights as text + """ + insights = [] + + for name, df in data_frames.items(): + if df.empty: + continue + + insight = f"Dataset: {name}\n" + insight += f"Shape: {df.shape[0]} rows, {df.shape[1]} columns\n" + insight += f"Columns: {', '.join(df.columns.tolist())}\n" + + # Basic statistics for numeric columns + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) > 0: + insight += "Numeric column statistics:\n" + for col in numeric_cols[:5]: # Limit to first 5 columns + stats = df[col].describe() + insight += f" {col}: min={stats['min']:.2f}, max={stats['max']:.2f}, mean={stats['mean']:.2f}, median={df[col].median():.2f}\n" + + if len(numeric_cols) > 5: + insight += f" ... and {len(numeric_cols) - 5} more numeric columns\n" + + # Date range for datetime columns + date_cols = df.select_dtypes(include=['datetime']).columns + if len(date_cols) > 0: + insight += "Date range:\n" + for col in date_cols: + if df[col].notna().any(): + min_date = df[col].min() + max_date = df[col].max() + insight += f" {col}: {min_date} to {max_date}\n" + + # Categorical column value counts + cat_cols = df.select_dtypes(include=['object', 'category']).columns + if len(cat_cols) > 0: + insight += "Categorical columns:\n" + for col in cat_cols[:3]: # Limit to first 3 columns + # Get top 3 values + top_values = df[col].value_counts().head(3) + vals_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()]) + insight += f" {col}: {df[col].nunique()} unique values. Top values: {vals_str}\n" + + if len(cat_cols) > 3: + insight += f" ... and {len(cat_cols) - 3} more categorical columns\n" + + # Missing values + missing = df.isna().sum() + if missing.sum() > 0: + cols_with_missing = missing[missing > 0] + insight += "Missing values:\n" + for col, count in cols_with_missing.items(): + pct = 100 * count / len(df) + insight += f" {col}: {count} missing values ({pct:.1f}%)\n" + + insights.append(insight) + + return "\n\n".join(insights) + + def _generate_visualizations(self, data_frames: Dict[str, pd.DataFrame], analysis_type: str, + workflow_id: str, task: str) -> List[Dict[str, Any]]: + """ + Generate appropriate visualizations based on data and analysis type. + + Args: + data_frames: Dictionary of DataFrames to visualize + analysis_type: Type of analysis being performed + workflow_id: Workflow ID + task: Original task description + + Returns: + List of visualization document objects + """ + documents = [] + + for name, df in data_frames.items(): + if df.empty or df.shape[0] < 2: + continue # Skip empty or single-row DataFrames + + # Generate different visualizations based on the analysis type + if analysis_type == "statistical": + viz_docs = self._create_statistical_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + elif analysis_type == "trend": + viz_docs = self._create_trend_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + elif analysis_type == "comparative": + viz_docs = self._create_comparative_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + elif analysis_type == "predictive": + viz_docs = self._create_predictive_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + elif analysis_type == "clustering": + viz_docs = self._create_clustering_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + else: # general analysis + viz_docs = self._create_general_visualizations(df, name, workflow_id) + documents.extend(viz_docs) + + return documents + + def _create_statistical_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create statistical visualizations for a DataFrame""" + documents = [] + + # 1. Distribution/Histogram plots for numeric columns + numeric_cols = df.select_dtypes(include=['number']).columns[:5] # Limit to first 5 + if len(numeric_cols) > 0: + plt.figure(figsize=(12, 8)) + + for i, col in enumerate(numeric_cols, 1): + plt.subplot(len(numeric_cols), 1, i) + sns.histplot(df[col].dropna(), kde=True) + plt.title(f'Distribution of {col}') + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_stat_dist_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Statistical Distributions - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 2. Box plots for numeric columns + if len(numeric_cols) > 0: + plt.figure(figsize=(12, 8)) + sns.boxplot(data=df[numeric_cols]) + plt.title(f'Box Plots of Numeric Variables in {name}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_stat_box_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Box Plots - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 3. Correlation heatmap for numeric columns + if len(numeric_cols) >= 2: + plt.figure(figsize=(10, 8)) + corr = df[numeric_cols].corr() + sns.heatmap(corr, annot=True, cmap='coolwarm', center=0) + plt.title(f'Correlation Heatmap - {name}') + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_stat_corr_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Correlation Heatmap - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _create_trend_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create trend visualizations for a DataFrame""" + documents = [] + + # Check for date/time columns + date_cols = df.select_dtypes(include=['datetime']).columns + + # If we have date columns, create time series plots + if len(date_cols) > 0: + date_col = date_cols[0] # Use the first date column + + # Find numeric columns to plot against the date + numeric_cols = df.select_dtypes(include=['number']).columns[:3] # Limit to first 3 + + if len(numeric_cols) > 0: + plt.figure(figsize=(12, 8)) + + for i, col in enumerate(numeric_cols, 1): + plt.subplot(len(numeric_cols), 1, i) + plt.plot(df[date_col], df[col]) + plt.title(f'Trend of {col} over time') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_trend_time_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Time Series Trends - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # If no date columns found, find another column that might represent sequence/order + else: + # Look for columns with sequential numbers + potential_sequence_cols = [] + for col in df.select_dtypes(include=['number']).columns: + values = df[col].dropna().values + if len(values) >= 5: + # Check if values are mostly sequential + diffs = np.diff(sorted(values)) + if np.all(diffs > 0) and np.std(diffs) / np.mean(diffs) < 0.5: + potential_sequence_cols.append(col) + + # Use first potential sequence column or first numeric column + numeric_cols = df.select_dtypes(include=['number']).columns + if len(potential_sequence_cols) > 0 and len(numeric_cols) > 1: + sequence_col = potential_sequence_cols[0] + # Find other numeric columns to plot against the sequence + plot_cols = [col for col in numeric_cols if col != sequence_col][:2] + + plt.figure(figsize=(12, 6)) + for col in plot_cols: + plt.plot(df[sequence_col], df[col], marker='o', label=col) + plt.title(f'Trend by {sequence_col} - {name}') + plt.legend() + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_trend_seq_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Sequential Trends - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # Moving average visualization if we have enough data points + if len(df) > 10: + numeric_cols = df.select_dtypes(include=['number']).columns[:2] # Limit to first 2 + if len(numeric_cols) > 0: + plt.figure(figsize=(12, 6)) + + for col in numeric_cols: + # Sort data if we have a date column + if len(date_cols) > 0: + sorted_df = df.sort_values(by=date_cols[0]) + else: + sorted_df = df + + # Calculate moving average (window size 3) + values = sorted_df[col].values + window_size = min(3, len(values) - 1) + if window_size > 0: + moving_avg = np.convolve(values, np.ones(window_size)/window_size, mode='valid') + + # Plot original and moving average + plt.plot(values, label=f'{col} (Original)') + plt.plot(np.arange(window_size-1, len(values)), moving_avg, label=f'{col} (Moving Avg)') + + plt.title(f'Moving Average Trends - {name}') + plt.legend() + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_trend_mavg_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Moving Average Trends - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _create_comparative_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create comparative visualizations for a DataFrame""" + documents = [] + + # 1. Look for categorical columns to use for grouping + cat_cols = df.select_dtypes(include=['object', 'category']).columns + + if len(cat_cols) > 0: + # Use the first categorical column with reasonable number of unique values + groupby_col = None + for col in cat_cols: + unique_count = df[col].nunique() + if 2 <= unique_count <= 10: # Reasonable number of categories + groupby_col = col + break + + if groupby_col: + # Find numeric columns to compare across groups + numeric_cols = df.select_dtypes(include=['number']).columns[:3] # Limit to first 3 + + if len(numeric_cols) > 0: + # 1. Bar chart comparing means + plt.figure(figsize=(12, 6)) + mean_by_group = df.groupby(groupby_col)[numeric_cols].mean() + mean_by_group.plot(kind='bar') + plt.title(f'Mean Comparison by {groupby_col} - {name}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_comp_bar_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Mean Comparison by {groupby_col} - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 2. Box plots for comparing distributions + plt.figure(figsize=(12, 8)) + for i, col in enumerate(numeric_cols, 1): + plt.subplot(len(numeric_cols), 1, i) + sns.boxplot(x=groupby_col, y=col, data=df) + plt.title(f'Distribution of {col} by {groupby_col}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_comp_box_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Distribution Comparison - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 3. Scatter plot comparing two numeric variables + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) >= 2: + plt.figure(figsize=(10, 8)) + # Use first two numeric columns + x_col, y_col = numeric_cols[0], numeric_cols[1] + + scatter = plt.scatter(df[x_col], df[y_col]) + plt.title(f'Comparison of {x_col} vs {y_col} - {name}') + plt.xlabel(x_col) + plt.ylabel(y_col) + + # Add color if we have a categorical column + if len(cat_cols) > 0: + groupby_col = cat_cols[0] + if df[groupby_col].nunique() <= 10: # Reasonable number of categories + plt.figure(figsize=(10, 8)) + scatter = plt.scatter(df[x_col], df[y_col], c=pd.factorize(df[groupby_col])[0], cmap='viridis') + plt.title(f'Comparison of {x_col} vs {y_col} by {groupby_col} - {name}') + plt.xlabel(x_col) + plt.ylabel(y_col) + legend1 = plt.legend(scatter.legend_elements()[0], df[groupby_col].unique(), title=groupby_col) + plt.gca().add_artist(legend1) + + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_comp_scatter_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Variable Comparison - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _create_predictive_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create predictive visualizations for a DataFrame""" + documents = [] + + # Check for date/time columns for time series prediction + date_cols = df.select_dtypes(include=['datetime']).columns + + if len(date_cols) > 0: + date_col = date_cols[0] # Use the first date column + + # Sort by date + df_sorted = df.sort_values(by=date_col) + + # Find numeric columns to predict + numeric_cols = df.select_dtypes(include=['number']).columns[:2] # Limit to first 2 + + if len(numeric_cols) > 0: + plt.figure(figsize=(12, 8)) + + for i, col in enumerate(numeric_cols, 1): + plt.subplot(len(numeric_cols), 1, i) + + # Get values and dates + values = df_sorted[col].values + dates = df_sorted[date_col].values + + # Need minimum number of points for meaningful prediction + if len(values) >= 5: + # Use basic linear regression for prediction + # Convert dates to numeric values for regression + date_nums = np.array([(d - dates[0]).total_seconds() for d in dates]) + date_nums = date_nums / np.max(date_nums) # Normalize + + # Remove NaNs + mask = ~np.isnan(values) + if np.sum(mask) >= 3: # Need at least 3 points + x = date_nums[mask].reshape(-1, 1) + y = values[mask] + + # Fit linear regression + from sklearn.linear_model import LinearRegression + model = LinearRegression() + model.fit(x, y) + + # Predict on original range + y_pred = model.predict(x) + + # Extend for prediction + x_extended = np.linspace(0, 1.2, 100).reshape(-1, 1) + y_extended = model.predict(x_extended) + + # Convert x_extended back to dates for plotting + max_seconds = np.max([(d - dates[0]).total_seconds() for d in dates]) + future_seconds = x_extended.flatten() * max_seconds + future_dates = [dates[0] + pd.Timedelta(seconds=s) for s in future_seconds] + + # Plot + plt.plot(dates, values, 'o-', label='Actual') + plt.plot(future_dates, y_extended, '--', label='Predicted') + plt.axvline(x=dates[-1], color='r', linestyle=':', label='Current') + + plt.title(f'Prediction for {col}') + plt.xticks(rotation=45) + plt.legend() + + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_pred_time_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Time Series Prediction - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # Regression prediction (feature vs target) + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) >= 2: + plt.figure(figsize=(10, 8)) + + # Use first two numeric columns as feature and target + x_col, y_col = numeric_cols[0], numeric_cols[1] + + # Remove NaNs + df_clean = df[[x_col, y_col]].dropna() + + if len(df_clean) >= 5: # Need minimum points for regression + x = df_clean[x_col].values.reshape(-1, 1) + y = df_clean[y_col].values + + # Fit linear regression + from sklearn.linear_model import LinearRegression + model = LinearRegression() + model.fit(x, y) + + # Generate predictions + x_range = np.linspace(df_clean[x_col].min(), df_clean[x_col].max() * 1.1, 100).reshape(-1, 1) + y_pred = model.predict(x_range) + + # Plot + plt.scatter(df_clean[x_col], df_clean[y_col], label='Data Points') + plt.plot(x_range, y_pred, 'r--', label=f'Predicted {y_col}') + plt.title(f'Regression Prediction: {y_col} based on {x_col} - {name}') + plt.xlabel(x_col) + plt.ylabel(y_col) + plt.legend() + + # Add regression equation + slope = model.coef_[0] + intercept = model.intercept_ + plt.text(0.05, 0.95, f'{y_col} = {slope:.2f} * {x_col} + {intercept:.2f}', + transform=plt.gca().transAxes, fontsize=10, + verticalalignment='top') + + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_pred_reg_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Regression Prediction - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _create_clustering_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create clustering visualizations for a DataFrame""" + documents = [] + + # Need numeric columns for clustering + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) >= 2: + # Select two numeric columns for 2D visualization + cols = numeric_cols[:2] + + # Remove NaNs + df_clean = df[cols].dropna() + + if len(df_clean) >= 5: # Need minimum points for clustering + # Normalize data + from sklearn.preprocessing import StandardScaler + scaler = StandardScaler() + data_scaled = scaler.fit_transform(df_clean) + + # Apply K-means clustering + from sklearn.cluster import KMeans + # Determine number of clusters (2-5 based on data size) + n_clusters = min(max(2, len(df_clean) // 10), 5) + kmeans = KMeans(n_clusters=n_clusters, random_state=42) + clusters = kmeans.fit_predict(data_scaled) + + # Add cluster labels to DataFrame + df_clean['Cluster'] = clusters + + # Create scatter plot with clusters + plt.figure(figsize=(10, 8)) + + # Plot clusters + scatter = plt.scatter(df_clean[cols[0]], df_clean[cols[1]], c=df_clean['Cluster'], cmap='viridis') + + # Plot centroids + centroids = scaler.inverse_transform(kmeans.cluster_centers_) + plt.scatter(centroids[:, 0], centroids[:, 1], marker='X', s=200, c='red', label='Centroids') + + plt.title(f'K-means Clustering ({n_clusters} clusters) - {name}') + plt.xlabel(cols[0]) + plt.ylabel(cols[1]) + plt.legend(*scatter.legend_elements(), title="Clusters") + plt.legend() + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_clust_kmeans_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"K-means Clustering - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # If we have more than 2 numeric columns, also create a PCA visualization + if len(numeric_cols) > 2: + from sklearn.decomposition import PCA + + # Select more columns for PCA + pca_cols = numeric_cols[:min(len(numeric_cols), 5)] + + # Remove NaNs + df_pca = df[pca_cols].dropna() + + if len(df_pca) >= 5: + # Normalize data + pca_data = StandardScaler().fit_transform(df_pca) + + # Apply PCA to reduce to 2 dimensions + pca = PCA(n_components=2) + principal_components = pca.fit_transform(pca_data) + + # Create DataFrame with principal components + pca_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2']) + + # Apply clustering to PCA results + clusters = KMeans(n_clusters=n_clusters, random_state=42).fit_predict(pca_df) + pca_df['Cluster'] = clusters + + # Create scatter plot + plt.figure(figsize=(10, 8)) + scatter = plt.scatter(pca_df['PC1'], pca_df['PC2'], c=pca_df['Cluster'], cmap='viridis') + plt.title(f'PCA Clustering ({n_clusters} clusters) - {name}') + plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)') + plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)') + plt.legend(*scatter.legend_elements(), title="Clusters") + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_clust_pca_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"PCA Clustering - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _create_general_visualizations(self, df: pd.DataFrame, name: str, workflow_id: str) -> List[Dict[str, Any]]: + """Create general purpose visualizations for a DataFrame""" + documents = [] + + # 1. Data overview: numeric summary + numeric_cols = df.select_dtypes(include=['number']).columns + if len(numeric_cols) > 0: + # Create a bar chart of means for numeric columns + plt.figure(figsize=(12, 6)) + means = df[numeric_cols].mean().sort_values() + means.plot(kind='bar') + plt.title(f'Mean Values of Numeric Variables - {name}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_gen_means_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Numeric Variables Summary - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 2. Categorical data overview + cat_cols = df.select_dtypes(include=['object', 'category']).columns + if len(cat_cols) > 0: + # Select the first categorical column with reasonable cardinality + for col in cat_cols: + if df[col].nunique() <= 10: # Reasonable number of categories + plt.figure(figsize=(10, 6)) + value_counts = df[col].value_counts().sort_values(ascending=False) + value_counts.plot(kind='bar') + plt.title(f'Distribution of {col} - {name}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_gen_cat_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Categorical Distribution - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + break # Only use the first suitable column + + # 3. Correlation matrix if we have multiple numeric columns + if len(numeric_cols) >= 2: + plt.figure(figsize=(10, 8)) + corr = df[numeric_cols].corr() + sns.heatmap(corr, annot=True, cmap='coolwarm', center=0) + plt.title(f'Correlation Matrix - {name}') + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_gen_corr_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Correlation Matrix - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + # 4. If we have date columns, show time-based visualization + date_cols = df.select_dtypes(include=['datetime']).columns + if len(date_cols) > 0 and len(numeric_cols) > 0: + date_col = date_cols[0] # Use the first date column + num_col = numeric_cols[0] # Use the first numeric column + + plt.figure(figsize=(12, 6)) + plt.plot(df[date_col], df[num_col], marker='o') + plt.title(f'{num_col} over Time - {name}') + plt.xticks(rotation=45) + plt.tight_layout() + + # Save figure + img_data = self._get_figure_as_base64() + plt.close() + + # Create document + doc_id = f"viz_gen_time_{uuid.uuid4()}" + doc = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": f"Time Series Overview - {name}", + "content_type": "image/png", + "size": len(img_data) + }, + "contents": [{ + "type": "image", + "data": img_data, + "format": "base64" + }] + } + documents.append(doc) + + return documents + + def _get_figure_as_base64(self) -> str: + """Convert current matplotlib figure to base64 string""" + buffer = io.BytesIO() + plt.savefig(buffer, format='png', dpi=self.chart_dpi) + buffer.seek(0) + image_png = buffer.getvalue() + buffer.close() + + # Convert to base64 + image_base64 = base64.b64encode(image_png).decode('utf-8') + return image_base64 + + async def _generate_analysis(self, prompt: str, analysis_type: str) -> str: + """ + Generate analysis based on prompt and analysis type. + + Args: + prompt: The analysis prompt + analysis_type: Type of analysis + + Returns: + Generated analysis + """ + if not self.ai_service: + return f"## Data Analysis ({analysis_type})\n\nUnable to generate analysis: AI service not available." + + # Create specialized prompt based on analysis type + system_prompt = self._get_analysis_system_prompt(analysis_type) + + # Enhance the prompt with analysis-specific instructions + enhanced_prompt = f""" + Generate a detailed {analysis_type} analysis based on the following: + + {prompt} + + Your analysis should include: + 1. A summary of the data + 2. Key findings and insights + 3. Supporting evidence and calculations + 4. Clear conclusions + 5. Recommendations where appropriate + + Format the analysis in Markdown with proper headings, lists, and tables. + """ + + try: + content = await self.ai_service.call_api([ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": enhanced_prompt} + ]) + + # Ensure there's a title at the top + if not content.strip().startswith("# "): + content = f"# {analysis_type.capitalize()} Analysis\n\n{content}" + + return content + except Exception as e: + return f"# {analysis_type.capitalize()} Analysis\n\nError generating analysis: {str(e)}" + + def _get_analysis_system_prompt(self, analysis_type: str) -> str: + """ + Get specialized system prompt for specific analysis type. + + Args: + analysis_type: Type of analysis + + Returns: + System prompt + """ + base_prompt = self._get_system_prompt() + + # Add analysis-specific instructions + if analysis_type == "statistical": + return f"{base_prompt}\n\nFocus on statistical measures including mean, median, mode, variance, and distribution. Identify outliers and unusual data points. Present key statistics in tables where appropriate." + + elif analysis_type == "trend": + return f"{base_prompt}\n\nFocus on identifying trends over time, seasonality, and patterns in the data. Look for long-term movements, cyclical patterns, and turning points. Consider rate of change and growth rates." + + elif analysis_type == "comparative": + return f"{base_prompt}\n\nFocus on comparing different groups, categories, or time periods. Highlight similarities and differences. Use comparative metrics and relative measures rather than just absolute values." + + elif analysis_type == "predictive": + return f"{base_prompt}\n\nFocus on extrapolating trends and patterns to make predictions about future values. Discuss confidence levels and potential factors that could influence outcomes. Be clear about assumptions." + + elif analysis_type == "clustering": + return f"{base_prompt}\n\nFocus on identifying natural groupings or segments within the data. Describe the characteristics of each cluster and what distinguishes them. Consider similarities within groups and differences between groups." + + else: + return base_prompt + + def _get_system_prompt(self) -> str: + """ + Get specialized system prompt for analyst agent. + + Returns: + System prompt + """ + return f""" + You are {self.name}, a specialized {self.type} agent focused on data analysis. + + {self.description} + + When analyzing data: + 1. First, identify the data structure and key variables + 2. Look for patterns, trends, and outliers + 3. Provide statistical insights and evidence-based conclusions + 4. Highlight any important findings clearly + 5. Suggest visualizations that would help understand the data + + For CSV data, interpret tables correctly and perform calculations accurately. + For textual data, extract key metrics and relationships. + + Respond in a clear, analytical style, and format your findings in a structured report. + """ + + def send_analysis_result(self, analysis_content: str, sender_id: str, receiver_id: str, + task_id: str, analysis_data: Dict[str, Any] = None, + context_id: str = None) -> AgentMessage: + """ + Send analysis results using the protocol. + + Args: + analysis_content: Analysis content + sender_id: Sender ID + receiver_id: Receiver ID + task_id: Task ID + analysis_data: Additional analysis data + context_id: Context ID + + Returns: + Protocol message + """ + return self.protocol.create_result_message( + result_content=analysis_content, + sender_id=sender_id, + receiver_id=receiver_id, + task_id=task_id, + output_data=analysis_data, + result_format=self.result_format, + context_id=context_id + ) + + def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None, + error_details: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: + """ + Send error message using the protocol. + + Args: + error_description: Error description + sender_id: Sender ID + receiver_id: Receiver ID + error_details: Error details + context_id: Context ID + + Returns: + Protocol message + """ + return self.protocol.create_error_message( + error_description=error_description, + sender_id=sender_id, + receiver_id=receiver_id, + error_type="analysis_error", + error_details=error_details, + context_id=context_id + ) + + def send_document_request_message(self, document_description: str, sender_id: str, receiver_id: str, + filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: + """ + Send document request using the protocol. + + Args: + document_description: Document description + sender_id: Sender ID + receiver_id: Receiver ID + filters: Document filters + context_id: Context ID + + Returns: + Protocol message + """ + return self.protocol.create_document_request_message( + document_description=document_description, + sender_id=sender_id, + receiver_id=receiver_id, + filters=filters, + context_id=context_id + ) -# Singleton-Instanz +# Singleton instance _analyst_agent = None def get_analyst_agent(): - """Gibt eine Singleton-Instanz des Datenanalyst-Agenten zurück""" + """Returns a singleton instance of the data analyst agent""" global _analyst_agent if _analyst_agent is None: _analyst_agent = AnalystAgent() diff --git a/gwserver/modules/agentservice_agent_coder.py b/gwserver/modules/agentservice_agent_coder.py index 40b89d3c..e9be8096 100644 --- a/gwserver/modules/agentservice_agent_coder.py +++ b/gwserver/modules/agentservice_agent_coder.py @@ -1,475 +1,466 @@ """ -Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code. -Integriert direkten Code-Executor zur Vereinfachung des Ablaufs. +CoderAgent - A unified agent for developing and executing Python code. +Includes code execution capabilities previously in separate modules. """ import logging import json -import os -import asyncio import re import uuid +import traceback +import os import subprocess import tempfile -import traceback +import shutil import sys -import importlib.util -import inspect from datetime import datetime -from typing import List, Dict, Any, Optional, Tuple, Union +from typing import List, Dict, Any, Optional, Tuple from modules.agentservice_base import BaseAgent -from modules.lucydom_interface import get_lucydom_interface from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils from connectors.connector_aichat_openai import ChatService -from modules import agentservice_code_helpers +from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol logger = logging.getLogger(__name__) -class CodeExecutor: +class SimpleCodeExecutor: """ - Führt generierten Code in einer isolierten virtuellen Umgebung aus, - während Zugriff auf spezifische App-Module gewährt wird und - automatisch erforderliche Pakete installiert werden. + A simplified executor that runs Python code in isolated virtual environments. """ + # Class variable to store workflow environments for persistence + _workflow_environments = {} + def __init__(self, - app_modules: List[str] = None, - venv_path: Optional[str] = None, + workflow_id: str = None, timeout: int = 30, max_memory_mb: int = 512, - allowed_packages: List[str] = None, + requirements: List[str] = None, blocked_packages: List[str] = None): """ - Initialisiert den CodeExecutor. + Initialize the SimpleCodeExecutor. Args: - app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen - venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt - timeout: Maximale Ausführungszeit in Sekunden - max_memory_mb: Maximaler Arbeitsspeicher in MB - allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte) - blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive) + workflow_id: Optional workflow ID for persistent environments + timeout: Maximum execution time in seconds + max_memory_mb: Maximum memory in MB + requirements: List of packages to install + blocked_packages: List of blocked packages """ - self.app_modules = app_modules or [] - self.venv_path = venv_path + self.workflow_id = workflow_id self.timeout = timeout self.max_memory_mb = max_memory_mb self.temp_dir = None - self.allowed_packages = allowed_packages - self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"] + self.requirements = requirements or [] + self.blocked_packages = blocked_packages or [ + "cryptography", "flask", "django", "tornado", # Security risks + "tensorflow", "pytorch", "scikit-learn" # Resource intensive + ] + self.is_persistent = workflow_id is not None + @classmethod + def get_workflow_environment(cls, workflow_id: str) -> Optional[str]: + """Get an existing workflow environment path if it exists.""" + return cls._workflow_environments.get(workflow_id) + + @classmethod + def set_workflow_environment(cls, workflow_id: str, env_path: str) -> None: + """Store a workflow environment path.""" + cls._workflow_environments[workflow_id] = env_path + def _create_venv(self) -> str: - """Erstellt eine virtuelle Umgebung und gibt den Pfad zurück.""" - if self.venv_path and os.path.exists(self.venv_path): - return self.venv_path - - # Temporäres Verzeichnis für die virtuelle Umgebung erstellen - self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_") - venv_path = os.path.join(self.temp_dir, "venv") + """Creates a virtual environment and returns the path.""" + # Check for existing environment if using workflow_id + if self.workflow_id: + self.is_persistent = True + existing_env = self.get_workflow_environment(self.workflow_id) + if existing_env and os.path.exists(existing_env): + logger.info(f"Reusing existing virtual environment: {existing_env}") + self.temp_dir = os.path.dirname(existing_env) + return existing_env + else: + logger.info(f"Creating new environment for workflow {self.workflow_id}") + + # Create a new environment + venv_parent_dir = tempfile.mkdtemp(prefix="simple_exec_") + self.temp_dir = venv_parent_dir + venv_path = os.path.join(venv_parent_dir, "venv") try: - # Virtuelle Umgebung erstellen - logger.info(f"Erstelle virtuelle Umgebung in {venv_path}") + # Create virtual environment + logger.info(f"Creating new virtual environment in {venv_path}") subprocess.run([sys.executable, "-m", "venv", venv_path], check=True, capture_output=True) + + # Store the environment path if this is for a specific workflow + if self.workflow_id: + logger.info(f"Registering new persistent environment for workflow {self.workflow_id}") + self.set_workflow_environment(self.workflow_id, venv_path) + return venv_path except subprocess.CalledProcessError as e: - logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}") - raise RuntimeError(f"Konnte venv nicht erstellen: {e}") + logger.error(f"Error creating virtual environment: {e}") + raise RuntimeError(f"Could not create venv: {e}") def _get_pip_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung.""" + """Gets the path to the pip executable in the virtual environment.""" if os.name == 'nt': # Windows return os.path.join(venv_path, "Scripts", "pip.exe") else: # Unix/Linux return os.path.join(venv_path, "bin", "pip") def _get_python_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung.""" + """Gets the path to the Python executable in the virtual environment.""" if os.name == 'nt': # Windows return os.path.join(venv_path, "Scripts", "python.exe") else: # Unix/Linux return os.path.join(venv_path, "bin", "python") + + def _filter_requirements(self, requirements: List[str]) -> List[str]: + """Filter out blocked packages and invalid requirements.""" + if not requirements: + return [] + + filtered_requirements = [] + for req in requirements: + # Skip empty, comment lines, or invalid requirements + req = req.strip() + if not req or req.startswith('#') or '```' in req or req in ['`', '``', '```']: + logging.warning(f"Skipping comment or invalid requirement: {req}") + continue + + # Extract package name from requirement spec + import re + package_name = re.split(r'[=<>]', req)[0].strip().lower() + + if package_name in self.blocked_packages: + logging.warning(f"Blocked package detected: {package_name}") + continue + + filtered_requirements.append(req) + + return filtered_requirements - def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]: - """ - Installiert Pakete in der virtuellen Umgebung. - - Args: - packages: Liste der zu installierenden Pakete - venv_path: Pfad zur virtuellen Umgebung + def _install_packages(self, venv_path: str, requirements: List[str]) -> bool: + """Install packages in the virtual environment.""" + if not requirements: + return True - Returns: - Tuple aus (Erfolg, Fehlermeldung) - """ - if not packages: - return True, "" + # Filter requirements + filtered_requirements = self._filter_requirements(requirements) + if not filtered_requirements: + logger.info("No allowed packages to install") + return True - # Überprüfen, ob Pakete erlaubt sind - blocked = [] - for package in packages: - # Paketname ohne Version extrahieren - pkg_name = re.split('[=<>]', package)[0].strip() - - if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]: - blocked.append(pkg_name) - - if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]: - blocked.append(pkg_name) - - if blocked: - return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}" - - # Pakete installieren + # Get pip executable pip_executable = self._get_pip_executable(venv_path) - logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}") + # Install packages try: - # pip aktualisieren - mache diesen Schritt optional - try: - subprocess.run( - [pip_executable, "install", "--upgrade", "pip"], - check=False, # Changed from True to False to make it optional - capture_output=True, - timeout=60 - ) - except Exception as pip_error: - # Log the error but continue - logger.warning(f"Pip-Upgrade fehlgeschlagen, fahre mit Paketinstallation fort: {pip_error}") - - # Pakete installieren - process = subprocess.run( - [pip_executable, "install"] + packages, + logger.info(f"Installing packages: {', '.join(filtered_requirements)}") + result = subprocess.run( + [pip_executable, "install"] + filtered_requirements, check=True, capture_output=True, text=True, - timeout=120 # 2 Minuten Timeout für Paketinstallation + timeout=300 ) - - return True, process.stdout + logger.info("Package installation successful") + return True except subprocess.CalledProcessError as e: - error_msg = f"Fehler bei der Paketinstallation: {e.stderr}" - logger.error(error_msg) - return False, error_msg - except subprocess.TimeoutExpired: - return False, "Zeitüberschreitung bei der Paketinstallation." + logger.error(f"Error during package installation: {e.stderr}") + return False except Exception as e: - return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}" - - + logger.error(f"Error during package installation: {str(e)}") + return False + def _extract_required_packages(self, code: str) -> List[str]: - """ - Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements - und Pip-Installationsanweisungen. - - Args: - code: Der Python-Code - - Returns: - Liste der erkannten Paketnamen - """ + """Extract required packages from import statements and requirements comments in the code.""" + import re packages = set() - # Paketkommentare erkennen (# pip install package) + # Check for special REQUIREMENTS comment + requirements_match = re.search(r'# REQUIREMENTS:\s*([^\n]+)', code) + if requirements_match: + req_str = requirements_match.group(1).strip() + for pkg in req_str.split(','): + if pkg.strip(): + packages.add(pkg.strip()) + + # Add common base packages + base_packages = [ + "requests", "urllib3", "pydantic", + "pandas", "numpy", "matplotlib" + ] + + for pkg in base_packages: + packages.add(pkg) + + # Detect pip install comments pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code) for comment in pip_comments: for pkg in comment.split(): if pkg and not pkg.startswith('-'): packages.add(pkg.strip()) - # Import-Statements analysieren + # Analyze import statements import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE) - # Standardmodule, die nicht installiert werden müssen + # Standard modules that don't need installation std_modules = { 'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json', 'collections', 'itertools', 'functools', 'pathlib', 'shutil', 'tempfile', 'uuid', 'subprocess', 'threading', 'logging', - 'traceback', 'io', 'copy' + 'traceback', 'io', 'copy', 'typing', 'asyncio' } - # Module der App, die nicht installiert werden müssen - app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules) - + # Process all imports for module in import_lines: - if module not in std_modules and module not in app_modules_prefixes: + if module not in std_modules: packages.add(module) return list(packages) - - def _create_module_loader(self) -> str: - """ - Erstellt ein Hilfsskript, das App-Module in die venv importiert. - Gibt den Pfad zum Hilfsskript zurück. - """ - if not self.app_modules: - return "" - - # Temporäre Datei für den Module-Loader erstellen - module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - "module_loader.py") - - # Pfad zu den App-Modulen bestimmen - app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) - - # Modul-Loader-Code generieren - loader_code = f""" -import sys -import importlib.util -import os -# App-Pfad zum Suchpfad hinzufügen -sys.path.insert(0, "{app_path}") -# Module importieren -modules = {{}} -""" - - # Code zum Importieren der Module hinzufügen - for module_name in self.app_modules: - loader_code += f""" -try: - modules["{module_name}"] = __import__("{module_name}", fromlist=["*"]) - print(f"Modul '{module_name}' erfolgreich importiert") -except ImportError as e: - print(f"Fehler beim Importieren von '{module_name}': {{e}}") -""" - - # Loader-Datei schreiben - with open(module_loader_path, "w") as f: - f.write(loader_code) - - return module_loader_path - def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: """ - Führt den generierten Code in einer isolierten Umgebung aus. + Execute Python code in an isolated environment using a simple approach. Args: - code: Der auszuführende Python-Code - input_data: Eingabedaten für den Code (werden als JSON serialisiert) + code: Python code to execute + input_data: Optional input data for the code Returns: - Dict mit Ausführungsergebnissen, Ausgabe und Fehlern + Dictionary with execution results """ - # Virtuelle Umgebung erstellen oder bestehende verwenden - venv_path = self._create_venv() + logger.info(f"Executing code with workflow_id: {self.workflow_id}") + + # Create or reuse virtual environment + venv_path = self._create_venv() #creating self.temp_dir! + + # Create input_data directory for file handling + input_data_dir = os.path.join(self.temp_dir, "input_data") # Temp dir is at root + os.makedirs(input_data_dir, exist_ok=True) + + # Extract and install required packages + all_requirements = [] - # Erforderliche Pakete aus dem Code extrahieren - required_packages = self._extract_required_packages(code) + # Add explicitly provided requirements + if self.requirements: + all_requirements.extend(self.requirements) - # Pakete installieren, falls erforderlich - install_success = True - install_log = "" - if required_packages: - install_success, install_log = self._install_packages(required_packages, venv_path) - + # Extract requirements from code + extracted_requirements = self._extract_required_packages(code) + if extracted_requirements: + all_requirements.extend(extracted_requirements) + logger.info(f"Extracted required packages from code: {', '.join(extracted_requirements)}") + + # Install packages if needed + if all_requirements: + logger.info(f"Installing {len(all_requirements)} packages") + install_success = self._install_packages(venv_path, all_requirements) if not install_success: + # Return error if package installation failed return { "success": False, "output": "", - "error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}", + "error": f"Failed to install required packages: {', '.join(all_requirements)}", "result": None, - "installed_packages": required_packages + "exit_code": -1 } - # Temporäre Datei für den Code erstellen - code_id = str(uuid.uuid4())[:8] - code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"ai_code_{code_id}.py") - - # Module-Loader erstellen - module_loader_path = self._create_module_loader() - - # Eingabedaten als JSON speichern, wenn vorhanden - input_path = "" - if input_data: - import json - input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"input_{code_id}.json") - with open(input_path, "w") as f: - json.dump(input_data, f) - - # Outputpfad für Ergebnisse - output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"output_{code_id}.json") - - # Prepare all paths using forward slashes for consistency across platforms - safe_module_loader_path = module_loader_path.replace('\\', '/') if module_loader_path else "" - safe_input_path = input_path.replace('\\', '/') if input_path else "" - safe_output_path = output_path.replace('\\', '/') - - wrapped_code = f""" -# -*- coding: utf-8 -*- -# coding: utf-8 -import sys -import json -import traceback -import os - -# Ergebnisstruktur -result = {{ - "success": False, - "output": "", - "error": "", - "result": None, - "installed_packages": {required_packages} -}} - -try: - # Module laden, falls erforderlich - if "{safe_module_loader_path}": - module_loader = __import__("module_loader") - globals().update({{k: v for k, v in module_loader.modules.items()}}) - - # Eingabedaten laden, falls vorhanden - input_data = None - if "{safe_input_path}": - with open("{safe_input_path}", "r") as f: - input_data = json.load(f) - - # Ausgabeumleitung - from io import StringIO - original_stdout = sys.stdout - original_stderr = sys.stderr - captured_stdout = StringIO() - captured_stderr = StringIO() - sys.stdout = captured_stdout - sys.stderr = captured_stderr - - # Benutzercode ausführen - try: - # Den Code in einem lokalen Namespace ausführen - local_vars = {{"input_data": input_data}} - exec('''{code}''', globals(), local_vars) - - # Ergebnis speichern, falls eine Variable 'result' definiert wurde - if "result" in local_vars: - result["result"] = local_vars["result"] - - result["success"] = True - except Exception as e: - result["error"] = str(e) - result["error"] += "\\n" + traceback.format_exc() - finally: - # Ausgabe erfassen - result["output"] = captured_stdout.getvalue() - result["error"] += captured_stderr.getvalue() - - # Ausgabeumleitung zurücksetzen - sys.stdout = original_stdout - sys.stderr = original_stderr - -except Exception as outer_e: - result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}" - -# Ergebnis speichern -with open("{safe_output_path}", "w") as f: - json.dump(result, f, default=str) -""" - - # Code in temporäre Datei schreiben with UTF-8 encoding - with open(code_file_path, "w", encoding="utf-8") as f: - f.write(wrapped_code) - - # Python-Interpreter aus der virtuellen Umgebung bestimmen - python_executable = self._get_python_executable(venv_path) - - # Code ausführen - logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}") - try: - # Prozess mit Ressourcenbeschränkungen ausführen - cmd = [python_executable, code_file_path] - - # Umgebungsvariablen setzen, um Speicherlimit zu erzwingen - env = os.environ.copy() - if self.max_memory_mb: - if os.name == 'posix': # Unix/Linux - # Auf Unix-Systemen können wir ulimit verwenden - cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"] - elif os.name == 'nt': # Windows - # Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden - # Hier müsste eine komplexere Lösung implementiert werden - pass - - # Prozess starten und mit Timeout ausführen - process = subprocess.run( - cmd, - timeout=self.timeout, - env=env, - capture_output=True, - text=True - ) - - # Ergebnis aus der Ausgabedatei lesen - if os.path.exists(output_path): - with open(output_path, "r") as f: - import json - execution_result = json.load(f) - else: - execution_result = { - "success": False, - "output": process.stdout, - "error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}", - "result": None, - "installed_packages": required_packages - } + # Process extracted document content if available + if input_data and "extracted_documents" in input_data: + for doc in input_data["extracted_documents"]: + doc_name = doc["name"] + doc_content = doc["content"] + doc_type = doc["type"] + # Create file path + file_path = os.path.join(input_data_dir, doc_name) + + try: + # Write content to file + with open(file_path, 'w', encoding='utf-8') as f: + f.write(doc_content) + + # Add to files list if not already there + if "files" not in input_data: + input_data["files"] = [] + + input_data["files"].append({ + "id": f"extracted_{doc_name}", + "name": doc_name, + "type": doc_type, + "path": file_path + }) + + logger.info(f"Created file from extracted content: {doc_name}") + except Exception as e: + logger.error(f"Error creating file from extracted content: {str(e)}") + + # Copy input files to input_data directory if provided + if input_data and "files" in input_data: + for file_info in input_data.get("files", []): + # Skip files we just created from extracted content + if file_info.get("id", "").startswith("extracted_"): + continue + + source_path = file_info.get("path", "") + logger.info(f"Attempting to copy file from: {source_path}") + logger.info(f"File exists: {os.path.exists(source_path)}") + if source_path and os.path.exists(source_path): + # Get just the filename + file_name = os.path.basename(source_path) + # Create destination path in input_data directory + dest_path = os.path.join(input_data_dir, file_name) + + try: + # Copy the file + shutil.copy2(source_path, dest_path) + logger.info(f"Copied file to input_data directory: {dest_path}") + except Exception as e: + logger.error(f"Error copying file {source_path}: {str(e)}") + + # Create a file for the code + code_id = uuid.uuid4().hex[:8] + code_file = os.path.join(self.temp_dir, f"code_{code_id}.py") + + # Write the code as-is without injecting additional loader code + with open(code_file, "w", encoding="utf-8") as f: + f.write(code) + + # Get Python executable + python_executable = self._get_python_executable(venv_path) + logger.info(f"Using Python executable: {python_executable}") + + # Execute code + try: + # Run the code from root dir + working_dir = os.path.dirname(code_file) # This should be the project root + logger.info(f"DEBUG PATH Root: {os.getcwd()} Code: {code_file} Working Dir: {working_dir}") + process = subprocess.run( + [python_executable, code_file], + timeout=self.timeout, + capture_output=True, + text=True, + cwd=self.temp_dir + ) + + # Process the output + stdout = process.stdout + stderr = process.stderr + + # Get result from stdout if available + result_data = None + if process.returncode == 0 and stdout: + try: + # Look for the last line that could be JSON + for line in reversed(stdout.strip().split('\n')): + line = line.strip() + if line and line[0] in '{[' and line[-1] in '}]': + try: + result_data = json.loads(line) + # Successfully parsed JSON result, use it + break + except json.JSONDecodeError: + # Not valid JSON, continue to next line + continue + except Exception as e: + logger.warning(f"Failed to parse result from stdout: {str(e)}") + + # Create result dictionary + execution_result = { + "success": process.returncode == 0, + "output": stdout, + "error": stderr if process.returncode != 0 else "", + "result": result_data, + "exit_code": process.returncode + } + except subprocess.TimeoutExpired: + logger.error(f"Execution timed out after {self.timeout} seconds") execution_result = { "success": False, "output": "", - "error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)", + "error": f"Execution timed out (timeout after {self.timeout} seconds)", "result": None, - "installed_packages": required_packages + "exit_code": -1 } except Exception as e: + logger.error(f"Execution error: {str(e)}") execution_result = { "success": False, "output": "", - "error": f"Fehler bei der Ausführung: {str(e)}", + "error": f"Execution error: {str(e)}", "result": None, - "installed_packages": required_packages + "exit_code": -1 } - # Informationen zur Paketinstallation hinzufügen - if install_log: - execution_result["package_install_log"] = install_log - - # Temporäre Dateien aufräumen - self._cleanup_temp_files([code_file_path, input_path, output_path]) + # Clean up temporary code file + try: + if os.path.exists(code_file): + os.remove(code_file) + except Exception as e: + logger.warning(f"Error cleaning up temporary code file: {e}") return execution_result - - - def _cleanup_temp_files(self, file_paths: List[str]): - """Räumt temporäre Dateien auf.""" - for path in file_paths: - if path and os.path.exists(path): - try: - os.remove(path) - except Exception as e: - logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}") - + def cleanup(self): - """Räumt alle temporären Ressourcen auf.""" + """Clean up temporary resources.""" + # Skip cleanup for persistent environments + if self.is_persistent and self.workflow_id: + logger.info(f"Skipping cleanup for persistent environment of workflow {self.workflow_id}") + return + + # Clean up temporary directory if self.temp_dir and os.path.exists(self.temp_dir): - import shutil try: shutil.rmtree(self.temp_dir) - logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}") + logger.info(f"Deleted temporary directory: {self.temp_dir}") except Exception as e: - logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}") + logger.warning(f"Could not delete temporary directory {self.temp_dir}: {e}") def __del__(self): - """Aufräumen beim Garbage Collection.""" + """Clean up during garbage collection.""" self.cleanup() +def get_error_recommendation(error_message: str) -> str: + """Generate recommendations based on error message.""" + if "ImportError" in error_message or "ModuleNotFoundError" in error_message: + return """ +### Recommendation +The error indicates a missing Python module. Try using standard libraries or common data analysis modules. +""" + elif "PermissionError" in error_message: + return """ +### Recommendation +The code doesn't have the necessary permissions to access files or directories. +""" + elif "SyntaxError" in error_message: + return """ +### Recommendation +There's a syntax error in the code. Check for missing parentheses, quotes, colons, or indentation errors. +""" + elif "FileNotFoundError" in error_message: + return """ +### Recommendation +A file could not be found. Check the file path and make sure the file exists. +""" + else: + return """ +### Recommendation +To fix the error: +1. Check the exact error message +2. Simplify the code and test step by step +3. Use try/except blocks for error-prone operations +""" + + class CoderAgent(BaseAgent): - """Erweiterter Agent für die Entwicklung und Ausführung von Python-Code""" + """Agent for developing and executing Python code""" def __init__(self): """Initialize the coder agent with proper type and capabilities""" @@ -479,10 +470,18 @@ class CoderAgent(BaseAgent): self.id = "coder" self.type = "coder" self.name = "Python Code Agent" - self.description = "Entwickelt und führt Python-Code aus" + self.description = "Develops and executes Python code" self.capabilities = "code_development,data_processing,file_processing,automation" self.result_format = "python_code" + # Add document capabilities + self.supports_documents = True + self.document_capabilities = ["read", "reference", "create"] + self.required_context = ["workflow_id"] + + # Initialize protocol + self.protocol = AgentCommunicationProtocol() + # Init utilities self.file_utils = FileUtils() self.message_utils = MessageUtils() @@ -497,117 +496,120 @@ class CoderAgent(BaseAgent): def get_agent_info(self) -> Dict[str, Any]: """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format, + info = super().get_agent_info() + info.update({ "metadata": { "timeout": self.executor_timeout, "memory_limit": self.executor_memory_limit } - } + }) + return info - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: + async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ - Processes a message to develop and execute Python code. + Process a message to develop and execute Python code. Args: message: The message to process - workflow: The current workflow context: Additional context information - log_func: Function for workflow logging Returns: Response message """ - # Initialize logging - workflow_id = workflow.get("id") - logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"CoderAgent startet Verarbeitung", "agents") + # Extract workflow_id from context or message + workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown") - # Initialize utilities - workflow_utils = WorkflowUtils(workflow_id) + # Get or create logging_utils + log_func = context.get("log_func") if context else None + logging_utils = LoggingUtils(workflow_id, log_func) # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") + response = { + "role": "assistant", + "content": "", + "agent_id": self.id, + "agent_type": self.type, + "agent_name": self.name, + "workflow_id": workflow_id, + "documents": [] + } try: - # Check if user directly provided code + # Extract content and documents content = message.get("content", "") documents = message.get("documents", []) # Extract code from message content code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content) code_to_execute = None + requirements = [] if code_blocks: # Use the first code block found code_to_execute = code_blocks[0] - logging_utils.info(f"Code aus Nachricht extrahiert ({len(code_to_execute)} Zeichen)", "agents") + # Clean the code to remove any markdown formatting + code_to_execute = self._clean_code(code_to_execute) + logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents") else: - # Generate code based on the message content using OpenAI - logging_utils.info("Kein Code in der Nachricht gefunden, generiere neuen Code mit AI", "agents") + # Generate code based on the message content using AI + logging_utils.info("No code found in message, generating new code with AI", "agents") # Generate code using AI - code_to_execute = await self._generate_code_from_prompt(content, documents, context) + code_to_execute, requirements = await self._generate_code_from_prompt(content, documents) if not code_to_execute: - logging_utils.warning("AI konnte keinen Code generieren", "agents") - response["content"] = "Ich konnte basierend auf Ihrer Anfrage keinen ausführbaren Code generieren. Bitte geben Sie detailliertere Anweisungen an." + logging_utils.warning("AI could not generate code", "agents") + response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions." self.message_utils.finalize_message(response) return response - logging_utils.info(f"Code mit AI generiert ({len(code_to_execute)} Zeichen)", "agents") + logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents") - # Get database interface for code execution - mandate_id = workflow.get("mandate_id", 0) - user_id = workflow.get("user_id", 0) - lucydom_interface = get_lucydom_interface(mandate_id, user_id) - # Execute the code if code_to_execute: - logging_utils.info("Führe Code aus", "execution") + logging_utils.info("Executing code", "execution") # Prepare execution context execution_context = { "workflow_id": workflow_id, "documents": documents, "message": message, - "mandate_id": mandate_id, - "user_id": user_id + "log_func": log_func } + # Send a status update + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Processing code execution request", + sender_id=self.id, + status="in_progress", + progress=0.5, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + # Execute code - result = await self._execute_code(code_to_execute, lucydom_interface, execution_context) + result = await self._execute_code(code_to_execute, requirements, execution_context) # Prepare response if result.get("success", False): # Code execution successful output = result.get("output", "") execution_result = result.get("result") - logging_utils.info("Code erfolgreich ausgeführt", "execution") + logging_utils.info("Code executed successfully", "execution") # Format response content - response_content = f"## Code erfolgreich ausgeführt\n\n" + response_content = f"## Code executed successfully\n\n" # Include the executed code - response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n" + response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n" # Include the output if available if output: - response_content += f"### Ausgabe\n\n```\n{output}\n```\n\n" + response_content += f"### Output\n\n```\n{output}\n```\n\n" # Include the execution result if available if execution_result: result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) - response_content += f"### Ergebnis\n\n```\n{result_str}\n```\n\n" + response_content += f"### Result\n\n```\n{result_str}\n```\n\n" response["content"] = response_content @@ -617,359 +619,361 @@ class CoderAgent(BaseAgent): for file_info in created_files: file_id = file_info.get("id") if file_id: - logging_utils.info(f"Füge erstellte Datei {file_info.get('name', file_id)} zu Dokumenten hinzu", "files") - file_meta = lucydom_interface.get_file(file_id) - if file_meta: - # Add file document to the response - doc = { - "id": f"doc_{uuid.uuid4()}", - "source": file_meta, - "type": "file" - } - response["documents"].append(doc) + logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files") + # Add file document to the response + doc = { + "id": f"doc_{uuid.uuid4()}", + "source": file_info, + "type": "file" + } + response["documents"].append(doc) else: # Code execution failed - error = result.get("error", "Unbekannter Fehler") - logging_utils.error(f"Fehler bei der Codeausführung: {error}", "execution") + error = result.get("error", "Unknown error") + logging_utils.error(f"Error during code execution: {error}", "execution") # Format error response - response_content = f"## Fehler bei der Codeausführung\n\n" - response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n" - response_content += f"### Fehler\n\n```\n{error}\n```\n\n" + response_content = f"## Error during code execution\n\n" + response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n" + response_content += f"### Error\n\n```\n{error}\n```\n\n" # Add recommendation based on error - response_content += self._get_error_recommendation(error) + response_content += get_error_recommendation(error) response["content"] = response_content else: # No code to execute - response["content"] = "Ich konnte keinen ausführbaren Code finden oder generieren. Bitte geben Sie Python-Code an oder erläutern Sie Ihre Anforderungen genauer." + response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly." # Finalize response self.message_utils.finalize_message(response) # Log success - logging_utils.info("CoderAgent hat die Anfrage erfolgreich verarbeitet", "agents") + logging_utils.info("CoderAgent has successfully processed the request", "agents") return response except Exception as e: - error_msg = f"Fehler bei der Verarbeitung durch den CoderAgent: {str(e)}" + error_msg = f"Error during processing by the CoderAgent: {str(e)}" logging_utils.error(error_msg, "error") # Create error response - response["content"] = f"## Fehler bei der Verarbeitung\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```" + response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```" self.message_utils.finalize_message(response) return response - - async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]], context: Dict[str, Any] = None) -> str: + def _clean_code(self, code: str) -> str: """ - Generate Python code from a prompt using OpenAI service. + Clean up code by removing markdown code block markers and other formatting artifacts. + + Args: + code: The code string to clean + + Returns: + Cleaned code string + """ + import re + + # Remove code block markers at beginning/end + code = re.sub(r'^```(?:python)?\s*', '', code) + code = re.sub(r'```\s*$', '', code) + + # Remove any trailing markdown code blocks that might have been added by the AI + lines = code.split('\n') + clean_lines = [] + + # Flag to track if we're in a trailing markdown section + in_trailing_markdown = False + + for line in reversed(lines): + stripped = line.strip() + + # Check if this line contains only backticks (``` or ` or ``) + if re.match(r'^`{1,3}$', stripped): + in_trailing_markdown = True + continue + + # Check if this is a markdown comment or note + if in_trailing_markdown and (stripped.startswith('#') or + stripped.lower().startswith('note:') or + stripped.lower().startswith('example:')): + continue + + # If we've reached actual code, stop considering trailing markdown + if stripped and not in_trailing_markdown: + in_trailing_markdown = False + + # Add this line if it's not part of trailing markdown + if not in_trailing_markdown: + clean_lines.insert(0, line) + + # Join the lines back together + clean_code = '\n'.join(clean_lines) + + # Final cleanup for any stray backticks + clean_code = re.sub(r'`{1,3}\s*$', '', clean_code) + + return clean_code.strip() + + async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]: + """ + Generate Python code from a prompt using AI service. Args: prompt: The prompt to generate code from documents: Documents associated with the prompt - context: Additional context information Returns: - Generated Python code + Tuple of (generated Python code, required packages) """ try: # Initialize AI service chat_service = ChatService() - # Prepare a detailed prompt for code generation - ai_prompt = self._prepare_code_prompt(prompt, documents) + # Prepare a prompt for code generation + ai_prompt = f"""Generate Python code to solve the following task: +{prompt} + +Available input files: +""" + # Add information about available documents + if documents: + for i, doc in enumerate(documents): + source = doc.get("source", {}) + doc_name = source.get("name", f"Document {i+1}") + doc_type = source.get("content_type", "unknown") + doc_id = source.get("id", "") + + ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id}, path: './input_data/{doc_name}')\n" + else: + ai_prompt += "No input files available.\n" - # Create messages for the OpenAI API + ai_prompt += """ +IMPORTANT REQUIREMENTS: +1. Your code MUST define a 'result' variable to store the final output of your code. +2. At the end of your script, it should print or output the result variable. +3. Make your 'result' variable a dictionary or another JSON-serializable data structure that contains all relevant output. +4. Input files are accessible in the './input_data/' directory. +5. Keep code well-documented with comments explaining key operations. +6. Make your code complete and self-contained. +7. Include proper error handling. + +FORMAT INSTRUCTIONS: +- Provide ONLY the Python code without ANY introduction, explanation, or conclusion text +- DO NOT include code block markers like ```python or ``` +- DO NOT explain what the code does before or after it +- DO NOT include any text that is not valid Python code +- Start your response directly with valid Python code +- End your response with valid Python code + +For required packages, place them in a specially formatted comment at the top of your code like this: +# REQUIREMENTS: pandas,numpy,matplotlib,requests + +Your entire response must be valid Python that can be executed without modification. +""" + + # Create messages for the API messages = [ - {"role": "system", "content": "You are a Python code generator. Generate only executable Python code without explanations. The code should be well-commented, handle errors appropriately, and follow best practices."}, + {"role": "system", "content": "You are a Python code generator that provides ONLY clean, executable Python code without any explanations, markdown formatting, or non-code text. Your response should be nothing but valid Python code that can be executed directly."}, {"role": "user", "content": ai_prompt} ] - # Call the OpenAI API - logging.info(f"Calling OpenAI API to generate code") + # Call the API + logging.info(f"Calling AI API to generate code") generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens) - # Extract code from the response (the AI might wrap it in markdown) - code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content) + # Clean the generated content to ensure it's only valid Python code + code = self._clean_code(generated_content) - if code_blocks: - # Use the first code block found - return code_blocks[0].strip() - else: - # If no code block is found, return the raw response - return generated_content.strip() - + # Extract requirements from special comment at the top of the code + requirements = [] + for line in code.split('\n'): + if line.strip().startswith("# REQUIREMENTS:"): + req_str = line.replace("# REQUIREMENTS:", "").strip() + requirements = [r.strip() for r in req_str.split(',') if r.strip()] + break + + return code, requirements + except Exception as e: logging.error(f"Error generating code with AI: {str(e)}", exc_info=True) - # Return a basic error-handling code - error_msg = str(e).replace('"', '\\"') + # Return basic error handling code and no requirements + error_str = str(e).replace('"', '\\"') return f""" # Error during code generation -print("An error occurred during code generation: {error_msg}") - +print(f"An error occurred during code generation: {error_str}") # Return an error result -result = {{"error": "Code generation failed", "message": "{error_msg}"}} -""" +result = {{"error": "Code generation failed", "message": "{error_str}"}} +""", [] - - def _prepare_code_prompt(self, user_prompt: str, documents: List[Dict[str, Any]]) -> str: + async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]: """ - Prepares a detailed prompt for the AI to generate Python code. + Execute Python code using the SimpleCodeExecutor. Args: - user_prompt: The original user request - documents: Available documents + code: The Python code to execute + requirements: List of required packages + context: Additional context for execution Returns: - A detailed prompt for code generation + Result of code execution """ - # Start with the user's request - prompt = f"""Generate Python code to solve the following task: -{user_prompt} - -""" + # Get workflow ID and set up logging + workflow_id = context.get("workflow_id", "") if context else "" + logging_utils = None + if "log_func" in context and workflow_id: + logging_utils = LoggingUtils(workflow_id, context.get("log_func")) - # Add information about available documents - if documents: - prompt += "\nAvailable documents:\n" - for i, doc in enumerate(documents): - source = doc.get("source", {}) - doc_name = source.get("name", f"Document {i+1}") - doc_type = source.get("content_type", "unknown") - doc_id = source.get("id", "") - - prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n" - - # Add information about how to access documents - prompt += """ -To access these documents, use: -- await load_file(file_id, encoding='utf-8') for text files -- await load_file(file_id) for binary files -""" + if logging_utils: + logging_utils.info("Executing Python code", "execution") + if requirements: + logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution") - # Add information about available helper functions - prompt += """ -Available helper functions: - -1. load_file(file_id, encoding=None): - - Asynchronous function to load file content - - Returns string if encoding is provided, otherwise bytes - -2. save_file(content, file_name, content_type=None): - - Saves content as a file and returns metadata - -3. update_file(file_id, content, update_metadata=None): - - Updates an existing file with new content - -4. get_file_metadata(file_id): - - Returns metadata for a file - -5. process_csv(content, operations=None): - - Processes CSV data with pandas - -6. extract_text_from_pdf(pdf_data): - - Extracts text from PDF documents - -Requirements: -- The code should be fully functional and handle errors -- Use async/await for asynchronous operations -- Return results in the 'result' variable as a dictionary -- For file operations, use the provided helper functions -- Always import necessary libraries at the top -""" - return prompt - - async def _execute_code(self, code: str, lucydom_interface, context: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Führt Python-Code mit dem eingebauten CodeExecutor aus. - - Args: - code: Der auszuführende Python-Code - lucydom_interface: Interface für Datenbankzugriffe - context: Zusätzlicher Kontext - - Returns: - Ergebnis der Codeausführung - """ try: - # Systemfunktionen für den Code vorbereiten - system_functions_code = self._prepare_system_functions(lucydom_interface) - - # Code mit Systemfunktionen erweitern - enhanced_code = system_functions_code + "\n\n" + code - - # Liste verfügbarer Module - available_modules = [ - "modules.lucydom_interface", - "modules.lucydom_model", - "modules.agentservice_utils" - ] - - # Liste erlaubter Pakete - allowed_packages = None # None bedeutet alle erlaubt außer explizit blockierte - - # Liste blockierter Pakete + # List of blocked packages for security blocked_packages = [ - "cryptography", "flask", "django", "tornado", # Sicherheitsrisiken - "tensorflow", "pytorch", "scikit-learn" # Ressourcenintensiv + "cryptography", "flask", "django", "tornado", # Security risks + "tensorflow", "pytorch", "scikit-learn" # Resource intensive ] - # CodeExecutor initialisieren - executor = CodeExecutor( - app_modules=available_modules, + # Initialize SimpleCodeExecutor with requirements and workflow_id for persistence + executor = SimpleCodeExecutor( + workflow_id=workflow_id, timeout=self.executor_timeout, max_memory_mb=self.executor_memory_limit, - allowed_packages=allowed_packages, + requirements=requirements, blocked_packages=blocked_packages ) - try: - # Eingabedaten vorbereiten - input_data = { - "context": context, - "workflow_id": context.get("workflow_id", "") if context else "", - } + # Prepare input data for the code + input_data = {"context": context, "workflow_id": workflow_id} + + # Add file references if available + if context and "documents" in context: + input_data["files"] = [ + { + "id": doc.get("source", {}).get("id", ""), + "name": doc.get("source", {}).get("name", ""), + "type": doc.get("source", {}).get("content_type", ""), + "path": doc.get("source", {}).get("path", "") # Full file path + } + for doc in context.get("documents", []) + if doc.get("source", {}).get("type") == "file" + ] + + # Extract document content from message but don't create files yet + if context and "message" in context and "content" in context["message"]: + message_content = context["message"]["content"] - # Dateireferenzen hinzufügen - if context and "documents" in context: - file_refs = [] - for doc in context.get("documents", []): - source = doc.get("source", {}) - if source.get("type") == "file": - file_refs.append({ - "id": source.get("id", ""), - "name": source.get("name", ""), - "type": source.get("content_type", "") + # Check if there's extracted document content + if "=== EXTRACTED DOCUMENT CONTENT ===" in message_content: + # Add a special field to input_data for extracted content + input_data["extracted_documents"] = [] + + # Split by the document marker pattern + pattern = r"--- (.*?) ---\s*" + import re + doc_sections = re.split(pattern, message_content) + + # Skip the first section (before any "--- doc ---" marker) + for i in range(1, len(doc_sections), 2): + if i+1 < len(doc_sections): + doc_name = doc_sections[i].strip() + doc_content = doc_sections[i+1].strip() + + # Store the extracted content to be processed by the executor + input_data["extracted_documents"].append({ + "name": doc_name, + "content": doc_content, + "type": "text/csv" if doc_name.endswith(".csv") else "text/plain" }) - input_data["files"] = file_refs - - # Code ausführen - result = executor.execute_code(enhanced_code, input_data) - - # Log für die Ausführung + if logging_utils: + logging_utils.info(f"Extracted document content: {doc_name}", "execution") + + + # Execute the code + if logging_utils: + logging_utils.info(f"Executing code with input data containing {len(input_data.get('files', []))} files", "execution") + + result = executor.execute_code(code, input_data) + + # Log the execution results + if logging_utils: if result.get("success", False): - logger.info(f"Code erfolgreich ausgeführt") + logging_utils.info("Code executed successfully", "execution") + + # Log a preview of the output output = result.get("output", "") if output: - logger.debug(f"Ausgabe: {output[:200]}..." if len(output) > 200 else output) + preview = output[:1000] + "..." if len(output) > 1000 else output + logging_utils.info(f"Output preview: {preview}", "execution") + + # Log a preview of the result + execution_result = result.get("result") + if execution_result: + if isinstance(execution_result, (dict, list)): + result_str = json.dumps(execution_result, indent=2) + preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str + else: + str_result = str(execution_result) + preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result + + logging_utils.info(f"Result preview: {preview}", "execution") else: - logger.error(f"Fehler bei der Codeausführung: {result.get('error', 'Unbekannter Fehler')}") - - return result - finally: - # Ressourcen freigeben + # Log error information + error = result.get("error", "Unknown error") + logging_utils.error(f"Error during code execution: {error}", "execution") + print("DEBUG CODE-ERROR:",code,"#END") + + # Clean up non-persistent environments + if not executor.is_persistent: executor.cleanup() - + + return result + except Exception as e: - logger.error(f"Fehler bei der Codeausführung: {str(e)}", exc_info=True) + error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}" + + if logging_utils: + logging_utils.error(error_message, "error") return { "success": False, "output": "", - "error": f"Fehler bei der Ausführung: {str(e)}\n{traceback.format_exc()}", + "error": error_message, "result": None } - - def _prepare_system_functions(self, lucydom_interface) -> str: - """ - Bereitet die Systemfunktionen für den auszuführenden Code vor. - Args: - lucydom_interface: Interface für Datenbankzugriffe - - Returns: - Python-Code für die Systemfunktionen - """ - # Get all helper functions from the module - helper_functions = [] - for name, func in inspect.getmembers(agentservice_code_helpers, inspect.isfunction): - # Get the source code of the function - source = inspect.getsource(func) - helper_functions.append(source) - - # Combine all functions into a single string - functions_code = "\n\n".join(helper_functions) - - # Add code to make lucydom_interface available - setup_code = """ -# lucydom_interface global verfügbar machen -import asyncio -""" - - return functions_code + "\n\n" + setup_code - - def _get_error_recommendation(self, error_message: str) -> str: - """ - Generate recommendations based on error message - - Args: - error_message: The error message - - Returns: - Recommendation text - """ - # Common error patterns and recommendations - if "ImportError" in error_message or "ModuleNotFoundError" in error_message: - return """ -### Empfehlung -Der Fehler deutet auf ein fehlendes Python-Modul hin. Einige Gründe könnten sein: -1. Das Modul ist in der Ausführungsumgebung nicht installiert -2. Das Modul ist aus Sicherheitsgründen blockiert (z.B. tensorflow, pytorch) -3. Es gibt einen Tippfehler im Modulnamen -Versuchen Sie, nur Standardbibliotheken oder gängige Datenanalyse-Module wie pandas, numpy, matplotlib zu verwenden. -""" - elif "PermissionError" in error_message: - return """ -### Empfehlung -Der Code hat nicht die nötigen Berechtigungen für den Zugriff auf Dateien oder Verzeichnisse. -Bitte nutzen Sie die bereitgestellten Funktionen `load_file()` und `save_file()` für Dateizugriffe. -""" - elif "SyntaxError" in error_message: - return """ -### Empfehlung -Im Code gibt es einen Syntaxfehler. Häufige Ursachen sind: -1. Fehlende oder überzählige Klammern, Anführungszeichen oder Doppelpunkte -2. Einrückungsfehler -3. Ungültige Python-Syntax + def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None, context_id: str = None) -> AgentMessage: + """Send an error message using the protocol""" + return self.protocol.create_error_message( + error_description=error_description, + sender_id=sender_id, + receiver_id=receiver_id, + error_type="code_execution", + context_id=context_id + ) -Überprüfen Sie den Code auf solche Fehler und korrigieren Sie ihn. -""" - elif "FileNotFoundError" in error_message: - return """ -### Empfehlung -Eine Datei konnte nicht gefunden werden. Wenn Sie auf Dateien zugreifen möchten: -1. Nutzen Sie die bereitgestellte `load_file(file_id)` Funktion -2. Stellen Sie sicher, dass die Datei-ID korrekt ist -3. Prüfen Sie, ob die Datei im Workflow verfügbar ist -""" - elif "TypeError" in error_message: - return """ -### Empfehlung -Es gibt einen Typfehler im Code. Überprüfen Sie: -1. Ob die richtigen Datentypen verwendet werden -2. Ob Konvertierungen zwischen Typen (z.B. str zu int) korrekt durchgeführt werden -3. Ob die Parameter für Funktionen den erwarteten Typen entsprechen -""" - else: - return """ -### Empfehlung -Um den Fehler zu beheben: -1. Überprüfen Sie die genaue Fehlermeldung -2. Vereinfachen Sie den Code und testen Sie schrittweise -3. Stellen Sie sicher, dass alle benötigten Daten korrekt geladen werden -4. Verwenden Sie try/except-Blöcke für fehleranfällige Operationen -""" - -# Singleton-Instanz + def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, + output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: + """Send a result message using the protocol""" + return self.protocol.create_result_message( + result_content=result_content, + sender_id=sender_id, + receiver_id=receiver_id, + task_id=task_id, + output_data=output_data, + result_format="python_code", + context_id=context_id + ) + +# Singleton instance _coder_agent = None def get_coder_agent(): - """Gibt eine Singleton-Instanz des Coder-Agenten zurück""" + """Returns a singleton instance of the Coder Agent""" global _coder_agent if _coder_agent is None: _coder_agent = CoderAgent() diff --git a/gwserver/modules/agentservice_agent_documentation.py b/gwserver/modules/agentservice_agent_documentation.py index 35e3d64f..380d4e5e 100644 --- a/gwserver/modules/agentservice_agent_documentation.py +++ b/gwserver/modules/agentservice_agent_documentation.py @@ -1,7 +1,7 @@ """ Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten. -Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation. -Angepasst für das refaktorisierte Core-Modul. +Verwendet einen adaptiven Prozess zur Erstellung hochwertiger Dokumentation basierend auf der Komplexität des Auftrags. +Angepasst für das refaktorisierte Core-Modul und AgentCommunicationProtocol. """ import logging @@ -15,428 +15,439 @@ import uuid from modules.agentservice_base import BaseAgent from connectors.connector_aichat_openai import ChatService from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils +from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol +from modules.agentservice_filemanager import FileManager # Import the file manager logger = logging.getLogger(__name__) class DocumentationAgent(BaseAgent): - """Agent für die Erstellung von Dokumentation und strukturierten Inhalten""" + """Agent for creating documentation and structured content""" def __init__(self): - """Initialisiert den Dokumentations-Agenten""" + """Initialize the documentation agent""" super().__init__() - self.id = "documentation" - self.name = "Dokumentation" + self.id = "documentation_agent" + self.name = "Documentation Specialist" self.type = "documentation" - self.description = "Erstellt Dokumentation und strukturierte Inhalte" + self.description = "Creates documentation and structured content" self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization" - self.instructions = """ - Du bist der Dokumentations-Agent. Deine Aufgabe: - 1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen - 2. Verschiedene Dokumentformate erstellen - 3. Informationen aus verschiedenen Quellen strukturieren - 4. Technische Konzepte verständlich erklären - 5. Konsistente Formatierung sicherstellen - """ self.result_format = "FormattedDocument" - - # Chat-Service initialisieren - self.chat_service = None - - # Utility-Klassen initialisieren - self.message_utils = MessageUtils() + # Initialize AI service + self.ai_service = None + + # Initialize document handler + self.document_handler = None + + # Document capabilities + self.supports_documents = True + self.document_capabilities = ["read", "reference", "create"] + self.required_context = ["document_purpose", "target_audience"] + + # Initialize protocol + self.protocol = AgentCommunicationProtocol() + + # Initialize utilities + self.message_utils = MessageUtils() + + # Track the latest generated document + self.last_document = {} + def get_agent_info(self) -> Dict[str, Any]: """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format - } - - def get_base_prompt(self, document_type: str = "") -> str: - """ - Generiert einen Basis-Prompt für den Dokumentations-Agenten. - - Args: - document_type: Typ des zu erstellenden Dokuments - - Returns: - Basis-Prompt für den Dokumentations-Agenten - """ - # Basis-Prompt - prompt = f""" - Du bist {self.name}, ein {self.type} Agent. - - {self.description} - - Fähigkeiten: {self.capabilities} - - {self.instructions} - """ - - # Dokumenttyp-spezifische Anweisungen hinzufügen - if document_type: - prompt += self._get_document_type_instructions(document_type) - - return prompt.strip() - - def _get_document_type_instructions(self, document_type: str) -> str: - """ - Gibt spezifische Anweisungen für einen bestimmten Dokumenttyp zurück. - - Args: - document_type: Typ des Dokuments - - Returns: - Spezifische Anweisungen für den Dokumenttyp - """ - document_type = document_type.lower() - - if "handbuch" in document_type or "anleitung" in document_type or "guide" in document_type: - return "\n\nHANDBUCH: Beginne mit Zweckbeschreibung, strukturiere in logische Schritte, verwende direkte Anweisungen." - elif "bericht" in document_type or "report" in document_type: - return "\n\nBERICHT: Beginne mit Executive Summary, strukturiere in thematische Abschnitte, halte professionellen Ton." - elif "prozess" in document_type or "process" in document_type: - return "\n\nPROZESS: Beschreibe Zweck, Ziele, Beteiligte, sequenzielle Schritte, Inputs/Outputs und Verantwortlichkeiten." - elif "präsentation" in document_type or "presentation" in document_type: - return "\n\nPRÄSENTATION: Klare Hauptpunkte, visuelle Elemente, Einleitung-Hauptteil-Schluss Struktur." - else: - return "\n\nDOKUMENT: Erstelle ein gut strukturiertes Dokument mit klarer Gliederung und präziser Sprache." - - def _detect_document_type(self, message: str) -> str: - """ - Erkennt den Dokumenttyp aus der Nachricht. - - Args: - message: Nachricht des Benutzers - - Returns: - Erkannter Dokumenttyp - """ - message = message.lower() - - if "handbuch" in message or "anleitung" in message or "guide" in message: - return "handbuch" - elif "bericht" in message or "report" in message: - return "bericht" - elif "prozess" in message or "process" in message or "ablauf" in message: - return "prozess" - elif "präsentation" in message or "presentation" in message or "folien" in message: - return "präsentation" - else: - return "dokument" - - async def generate_title(self, task: str, document_type: str) -> str: - """ - Generiert einen Titel für das Dokument. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - - Returns: - Generierter Titel - """ - prompt = f""" - Erstelle einen prägnanten, professionellen Titel für folgendes {document_type.capitalize()}: - - AUFTRAG: {task} - - Gib NUR den Titel zurück, ohne weitere Erklärungen oder Formatierungen. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung von Dokumenttiteln."}, - {"role": "user", "content": prompt} - ] - - title = await self.chat_service.call_api(messages) - - # Bereinige den Titel von Anführungszeichen und Überschriften-Symbolen - title = title.strip('"\'#*- \n\t') - - return title - - async def generate_summary(self, task: str, document_type: str, title: str) -> str: - """ - Generiert eine Zusammenfassung für das Dokument. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - - Returns: - Generierte Zusammenfassung - """ - prompt = f""" - Erstelle eine prägnante Zusammenfassung für folgendes Dokument: - - TITEL: {title} - TYP: {document_type.capitalize()} - AUFTRAG: {task} - - Die Zusammenfassung soll einen Überblick über den Zweck und die Hauptinhalte des Dokuments geben. - Sie sollte etwa 3-5 Sätze umfassen und als eigenständiger Abschnitt funktionieren. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung prägnanter Dokumentzusammenfassungen."}, - {"role": "user", "content": prompt} - ] - - summary = await self.chat_service.call_api(messages) - - return summary.strip() - - async def generate_toc_with_prompts(self, task: str, document_type: str, title: str, summary: str) -> Dict[str, str]: - """ - Generiert ein Inhaltsverzeichnis mit Prompts für die einzelnen Kapitel. - - Args: - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - summary: Zusammenfassung des Dokuments - - Returns: - Dict mit Kapiteltiteln als Schlüssel und Prompts als Werte - """ - prompt = f""" - Erstelle ein strukturiertes Inhaltsverzeichnis für folgendes Dokument: - - TITEL: {title} - TYP: {document_type.capitalize()} - AUFTRAG: {task} - ZUSAMMENFASSUNG: {summary} - - Für jedes Kapitel gib auch einen kurzen Prompt an, der beschreibt, was in diesem Kapitel behandelt werden soll. - Formatiere deine Antwort als JSON-Objekt mit folgendem Format: - {{ - "Kapitel 1: Titel": "Prompt für Kapitel 1", - "Kapitel 2: Titel": "Prompt für Kapitel 2", - ... - }} - - Beschränke dich auf sowenige Kapitel wie nötig, die das Thema umfassend behandeln. Schreibe in Prosa und nur als Liste, wenn auch angebracht. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Strukturierung von Dokumenten und die Erstellung von Inhaltsverzeichnissen."}, - {"role": "user", "content": prompt} - ] - - toc_response = await self.chat_service.call_api(messages) - - # JSON aus der Antwort extrahieren - import json - import re - - # Markdown-Code-Blöcke entfernen, falls vorhanden - toc_response = re.sub(r'```json\s*|\s*```', '', toc_response) - - try: - toc_with_prompts = json.loads(toc_response) - return toc_with_prompts - except json.JSONDecodeError as e: - logger.error(f"Fehler beim Parsen des Inhaltsverzeichnisses: {str(e)}") - logger.error(f"Rohe Antwort: {toc_response}") - # Notfall-Fallback - return { - "1. Einleitung": "Einführung in das Thema und Überblick", - "2. Hauptteil": "Hauptinhalte des Dokuments", - "3. Schlussfolgerung": "Zusammenfassung und nächste Schritte" + info = super().get_agent_info() + info.update({ + "metadata": { + "document_types": ["manual", "report", "process", "presentation", "document"], + "formats": ["markdown", "text"] } + }) + return info - async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str, - task: str, document_type: str, title: str, summary: str) -> str: + async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ - Generiert den Inhalt für ein bestimmtes Kapitel. + Process a message and create documentation. Args: - chapter_title: Titel des Kapitels - chapter_prompt: Prompt für das Kapitel - task: Die Aufgabe/Anfrage - document_type: Typ des Dokuments - title: Titel des Dokuments - summary: Zusammenfassung des Dokuments + message: Input message + context: Optional context Returns: - Generierter Kapitelinhalt + Response with documentation """ - prompt = f""" - Erstelle detaillierten Inhalt für folgendes Kapitel eines {document_type}s: + # Extract workflow_id from context or message + workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown") - DOKUMENT-TITEL: {title} - AUFGABE: {task} - KAPITEL: {chapter_title} - ANWEISUNG FÜR DIESES KAPITEL: {chapter_prompt} - - Der Inhalt sollte detailliert, informativ und gut strukturiert sein. - Verwende bei Bedarf Unterüberschriften, Aufzählungen und Tabellen zur besseren Strukturierung. - Der Inhalt sollte direkt mit dem Kapiteltext beginnen, ohne den Kapiteltitel zu wiederholen. - """ - - messages = [ - {"role": "system", "content": "Du bist ein Experte für die Erstellung hochwertiger Dokumentationsinhalte."}, - {"role": "user", "content": prompt} - ] - - chapter_content = await self.chat_service.call_api(messages) - - return chapter_content.strip() - - def _format_final_document(self, title: str, summary: str, toc: Dict[str, str], chapter_contents: Dict[str, str]) -> str: - """ - Formatiert das endgültige Dokument aus allen Teilen. - - Args: - title: Titel des Dokuments - summary: Zusammenfassung - toc: Inhaltsverzeichnis (Dict mit Kapiteltiteln als Schlüssel) - chapter_contents: Kapitelinhalte (Dict mit Kapiteltiteln als Schlüssel und Inhalten als Werte) - - Returns: - Formatiertes Dokument - """ - # Titel formatieren - doc = f"# {title}\n\n" - - # Zusammenfassung hinzufügen - doc += f"## Zusammenfassung\n\n{summary}\n\n" - - # Inhaltsverzeichnis hinzufügen - doc += "## Inhaltsverzeichnis\n\n" - for idx, chapter in enumerate(toc.keys(), 1): - # Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden) - clean_chapter = chapter - if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter: - clean_chapter = chapter.split('. ', 1)[1] - - doc += f"{idx}. {clean_chapter}\n" - doc += "\n" - - # Kapitelinhalte hinzufügen - for idx, (chapter, content) in enumerate(chapter_contents.items(), 1): - # Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden) - clean_chapter = chapter - if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter: - clean_chapter = chapter.split('. ', 1)[1] - - doc += f"## {idx}. {clean_chapter}\n\n{content}\n\n" - - # Metadaten hinzufügen - doc += "---\n\n" - doc += f"**Erstellt durch:** {self.name}\n" - - return doc - - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess. - - Args: - message: Die zu verarbeitende Nachricht - workflow: Der aktuelle Workflow - context: Zusätzlicher Kontext - log_func: Funktion für Workflow-Logging - - Returns: - Die generierte Dokumentation - """ - # Initialize logging - workflow_id = workflow.get("id", "unknown") + # Get or create logging_utils + log_func = context.get("log_func") if context else None logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"DocumentationAgent startet Dokumentationserstellung", "agents") - # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") + # Create response structure + response = { + "role": "assistant", + "content": "", + "agent_id": self.id, + "agent_type": self.type, + "agent_name": self.name, + "result_format": self.result_format, + "workflow_id": workflow_id, + "documents": [] + } try: - # Chat-Service initialisieren, falls noch nicht geschehen - if self.chat_service is None: - self.chat_service = ChatService() - - # Task aus der Nachricht extrahieren - task = message.get("content", "") - if context and "task" in context: - task = context["task"] - - # Dokumenttyp erkennen - document_type = self._detect_document_type(task) - logging_utils.info(f"Dokumenttyp erkannt: {document_type}", "agents") - - # Schritt 1: Titel generieren - title = await self.generate_title(task, document_type) - logging_utils.info(f"Titel generiert: {title}", "agents") - - # Schritt 2: Zusammenfassung generieren - summary = await self.generate_summary(task, document_type, title) - logging_utils.info("Zusammenfassung generiert", "agents") - - # Schritt 3: Inhaltsverzeichnis mit Prompts generieren - toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary) - logging_utils.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert", "agents") - - # Schritt 4: Kapitelinhalte in einer Schleife generieren - chapter_contents = {} - for chapter_title, chapter_prompt in toc_with_prompts.items(): - logging_utils.info(f"Generiere Inhalt für Kapitel: {chapter_title}", "agents") - content = await self.generate_chapter_content( - chapter_title, chapter_prompt, task, document_type, title, summary + # Create status update using protocol + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Starting document creation", + sender_id=self.id, + status="in_progress", + progress=0.0, + context_id=workflow_id ) - chapter_contents[chapter_title] = content + log_func(workflow_id, status_message.content, "info", self.id, self.name) - # Schritt 5: Dokument zusammenführen - final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents) - logging_utils.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen", "agents") + # Extract task from message + task = message.get("content", "") - # Set the content in the response - response["content"] = final_document + # Detect document type + document_type = self._detect_document_type(task) + logging_utils.info(f"Creating {document_type} documentation", "execution") - # Finalize the message - self.message_utils.finalize_message(response) - response["result_format"] = self.result_format + # Process any attached documents + document_context = "" + if message.get("documents"): + logging_utils.info("Processing reference documents", "execution") + document_context = await self._process_documents(message) + + # Update progress + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Reference documents processed", + sender_id=self.id, + status="in_progress", + progress=0.3, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) - # Chat-Service schließen - await self.chat_service.close() - self.chat_service = None + # Enhanced prompt with document context + enhanced_prompt = f"{task}\n\n{document_context}" + + # Assess complexity of the task + is_complex = await self._assess_complexity(enhanced_prompt) + + # Generate title + title = await self._generate_title(enhanced_prompt, document_type) + logging_utils.info(f"Document title: {title}", "execution") + + # Update progress + if log_func: + status_message = self.protocol.create_status_update_message( + status_description=f"Generating {document_type}: {title}", + sender_id=self.id, + status="in_progress", + progress=0.5, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Generate content based on complexity + if is_complex: + # For complex documents, use the AI service with enhanced prompt + content = await self._generate_complex_document(enhanced_prompt, document_type, title) + logging_utils.info("Complex document generated", "execution") + else: + # For simple documents, use direct generation + content = await self._generate_simple_document(enhanced_prompt, document_type, title) + logging_utils.info("Simple document generated", "execution") + + # Final progress update + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Document creation completed", + sender_id=self.id, + status="completed", + progress=1.0, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Create a document artifact if document handler is available + if self.document_handler: + doc_id = f"doc_{uuid.uuid4()}" + document = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": title, + "content_type": "text/markdown", + "size": len(content) + }, + "contents": [ + { + "type": "text", + "text": content, + "is_extracted": True + } + ] + } + + # Add document to response + response["documents"].append(document) + + # Store the latest document + self.last_document = document + + # Update response content to reference the document + response["content"] = f"I've created a document titled '{title}' that contains the requested information. The document is attached to this message." + + # If protocol message is required, send it + if context and context.get("require_protocol_message"): + result_message = self.send_document_result( + document_title=title, + document_content=content, + sender_id=self.id, + receiver_id=context.get("receiver_id", "workflow"), + context_id=workflow_id + ) + # Just log the message creation + logging_utils.info(f"Created protocol result message: {result_message.id}", "execution") + else: + # If no document handler, just put content in response + response["content"] = content return response except Exception as e: - error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}" + error_msg = f"Error in documentation agent: {str(e)}" logging_utils.error(error_msg, "error") - # Chat-Service schließen bei Fehler - if self.chat_service: - try: - await self.chat_service.close() - except: - pass - self.chat_service = None + # Create error response using protocol + error_message = self.protocol.create_error_message( + error_description=error_msg, + sender_id=self.id, + error_type="documentation", + error_details={"traceback": traceback.format_exc()}, + context_id=workflow_id + ) - # Create error response - response["content"] = f"## Fehler bei der Dokumentationserstellung\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" - self.message_utils.finalize_message(response) + # Set error in response + response["content"] = f"## Error creating documentation\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" + response["status"] = "error" return response + + async def _assess_complexity(self, task: str) -> bool: + """ + Assess task complexity to determine document structure. + + Args: + task: The task description + + Returns: + True if complex document needed, False otherwise + """ + if not self.ai_service: + # Default to complex if no AI service + return True + + prompt = f""" + Analyze this task and determine if it requires a complex or simple document structure: + + {task} + + Respond with only "COMPLEX" or "SIMPLE". + """ + + try: + response = await self.ai_service.call_api([ + {"role": "system", "content": "You determine document complexity requirements."}, + {"role": "user", "content": prompt} + ]) + + return "COMPLEX" in response.upper() + except Exception: + # Default to complex on error + return True + + async def _generate_title(self, task: str, document_type: str) -> str: + """ + Generate a title for the document. + + Args: + task: The task description + document_type: Type of document + + Returns: + Generated title + """ + if not self.ai_service: + # Default title if no AI service + return f"{document_type.capitalize()} Document" + + prompt = f""" + Create a concise, professional title for this {document_type}: + + {task} + + Respond with ONLY the title, nothing else. + """ + + try: + title = await self.ai_service.call_api([ + {"role": "system", "content": "You create document titles."}, + {"role": "user", "content": prompt} + ]) + + # Clean up the title + return title.strip('"\'#*- \n\t') + except Exception: + # Default title on error + return f"{document_type.capitalize()} Document" + + async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str: + """ + Generate a complex document with structure. + + Args: + task: The task description + document_type: Type of document + title: Document title + + Returns: + Generated document content + """ + if not self.ai_service: + return f"# {title}\n\nUnable to generate complex document: AI service not available." + + prompt = f""" + Create a comprehensive, well-structured {document_type} titled "{title}" based on: + + {task} + + The document should include: + 1. A clear introduction with purpose and scope + 2. Logically organized sections with headings + 3. Detailed content with examples and evidence + 4. A conclusion with key takeaways + 5. Appropriate formatting using Markdown + + Format the document in Markdown with proper headings, lists, and emphasis. + """ + + try: + content = await self.ai_service.call_api([ + {"role": "system", "content": "You create comprehensive, well-structured documentation."}, + {"role": "user", "content": prompt} + ]) + + # Ensure title is at the top + if not content.strip().startswith("# "): + content = f"# {title}\n\n{content}" + + return content + except Exception as e: + return f"# {title}\n\nError generating document: {str(e)}" + + async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str: + """ + Generate a simple document without complex structure. + + Args: + task: The task description + document_type: Type of document + title: Document title + + Returns: + Generated document content + """ + if not self.ai_service: + return f"# {title}\n\nUnable to generate document: AI service not available." + + prompt = f""" + Create a concise, focused {document_type} titled "{title}" based on: + + {task} + + The document should be clear, precise, and to the point without complex chapter structure. + Format using Markdown with appropriate headings and formatting. + """ + + try: + content = await self.ai_service.call_api([ + {"role": "system", "content": "You create concise, focused documentation."}, + {"role": "user", "content": prompt} + ]) + + # Ensure title is at the top + if not content.strip().startswith("# "): + content = f"# {title}\n\n{content}" + + return content + except Exception as e: + return f"# {title}\n\nError generating document: {str(e)}" + + def _detect_document_type(self, message: str) -> str: + """ + Detect document type from the message. + + Args: + message: User message + + Returns: + Detected document type + """ + message = message.lower() + + if any(term in message for term in ["manual", "guide", "instruction", "tutorial"]): + return "manual" + elif any(term in message for term in ["report", "analysis", "assessment", "review"]): + return "report" + elif any(term in message for term in ["process", "workflow", "procedure", "steps"]): + return "process" + elif any(term in message for term in ["presentation", "slides", "deck"]): + return "presentation" + else: + return "document" + + def send_document_result(self, document_title: str, document_content: str, + sender_id: str, receiver_id: str, context_id: str = None) -> AgentMessage: + """Send a document result using the protocol""" + metadata = { + "document_type": self._detect_document_type(document_content), + "title": document_title, + "created_at": datetime.now().isoformat() + } + + return self.protocol.create_result_message( + result_content=document_content, + sender_id=sender_id, + receiver_id=receiver_id, + task_id=f"doc_{uuid.uuid4()}", + output_data=metadata, + result_format=self.result_format, + context_id=context_id + ) + + def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None, + context_id: str = None) -> AgentMessage: + """Send an error message using the protocol""" + return self.protocol.create_error_message( + error_description=error_description, + sender_id=sender_id, + receiver_id=receiver_id, + error_type="documentation_error", + error_details={"timestamp": datetime.now().isoformat()}, + context_id=context_id + ) -# Singleton-Instanz +# Singleton instance _documentation_agent = None def get_documentation_agent(): - """Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück""" + """Returns a singleton instance of the documentation agent""" global _documentation_agent if _documentation_agent is None: _documentation_agent = DocumentationAgent() - return _documentation_agent \ No newline at end of file + return _documentation_agent \ No newline at end of file diff --git a/gwserver/modules/agentservice_agent_sharepoint.py b/gwserver/modules/agentservice_agent_sharepoint.py deleted file mode 100644 index ef6b510f..00000000 --- a/gwserver/modules/agentservice_agent_sharepoint.py +++ /dev/null @@ -1,209 +0,0 @@ -""" -Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten. -Angepasst für das refaktorisierte Core-Modul. -""" - -import logging -import traceback -from typing import List, Dict, Any, Optional, Union -from datetime import datetime -import uuid - -from modules.agentservice_base import BaseAgent -from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils - -logger = logging.getLogger(__name__) - -class SharepointAgent(BaseAgent): - """Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen""" - - def __init__(self): - """Initialisiert den SharePoint-Agenten""" - super().__init__() - self.id = "sharepoint" - self.name = "SharePoint-Agent" - self.type = "sharepoint" - self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen" - self.capabilities = "document_search,metadata_extraction,content_integration,sharepoint_interaction" - self.instructions = """ - Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es: - - 1. SharePoint-Dokumente und -Ressourcen zu durchsuchen und abzurufen - 2. Metadaten aus SharePoint-Dokumenten zu extrahieren und zu analysieren - 3. Strukturierte Informationen aus SharePoint-Bibliotheken zu sammeln - 4. Dokumente basierend auf Metadaten zu filtern und zu organisieren - 5. Inhalte aus verschiedenen SharePoint-Quellen zu integrieren und zusammenzuführen - 6. Informationen aus SharePoint-Listen und -Dokumentbibliotheken zu extrahieren - 7. Zusammenfassungen und Analysen von SharePoint-Inhalten zu erstellen - - Bei der Darstellung deiner Ergebnisse: - - Strukturiere die Informationen klar und übersichtlich - - Gib den Ursprung und die Metadaten der Dokumente an - - Zeige Beziehungen zwischen verschiedenen Dokumenten und Ressourcen auf - - Hebe wichtige Erkenntnisse und Muster hervor - - Biete Kontext und Relevanz für die gefundenen Informationen - """ - self.result_format = "DocumentList" - - # Utility-Klassen initialisieren - self.message_utils = MessageUtils() - - def get_agent_info(self) -> Dict[str, Any]: - """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format - } - - def get_prompt(self, message_context: Dict[str, Any]) -> str: - """ - Generiert einen angepassten Prompt für den SharePoint-Agenten. - - Args: - message_context: Kontext der Nachricht - - Returns: - Formatierter Prompt für den SharePoint-Agenten - """ - # Basis-Prompt vom BaseAgent holen - base_prompt = super().get_prompt(message_context) - - # Zusätzliche Anweisungen für SharePoint-Interaktion - sharepoint_instructions = """ - SHAREPOINT-INTERAKTIONS-RICHTLINIEN: - - 1. Präzisiere die Suchkriterien für SharePoint-Ressourcen - 2. Identifiziere relevante Bibliotheken, Listen und Standorte - 3. Definiere benötigte Metadaten und Inhalte - 4. Berücksichtige Berechtigungsanforderungen - 5. Priorisiere aktuelle und relevante Dokumente - 6. Stelle eine strukturierte Darstellung der Ergebnisse sicher - - Für eine gute SharePoint-Integration: - - Gib detaillierte Pfade und Standorte an - - Berücksichtige verschiedene Dokumenttypen und Formate - - Zeige Metadaten und Dokumenteigenschaften - - Biete Kontext zu den gefundenen Ressourcen - - Berücksichtige Versionsinformationen - """ - - # Task aus dem Kontext extrahieren - task = message_context.get("task", "") - task_instructions = f"\nSHAREPOINT-AUFTRAG:\n{task}\n" if task else "" - - # Vollständigen Prompt zusammenbauen - complete_prompt = f"{base_prompt}\n\n{sharepoint_instructions}\n{task_instructions}" - - return complete_prompt.strip() - - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: - """ - Verarbeitet eine Nachricht und interagiert mit SharePoint. - - Args: - message: Die zu verarbeitende Nachricht - workflow: Der aktuelle Workflow - context: Zusätzlicher Kontext - log_func: Funktion für Workflow-Logging - - Returns: - Die generierte Antwort mit SharePoint-Inhalten - """ - # Initialize logging - workflow_id = workflow.get("id", "unknown") - logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"SharePointAgent startet SharePoint-Interaktion", "agents") - - # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") - - try: - # Hier würde die tatsächliche Interaktion mit SharePoint stattfinden - # In der finalen Implementierung würde ein SharePoint-Connector verwendet werden - - # Als Beispiel generieren wir eine Standardantwort - logging_utils.info("SharePoint-Suche wird simuliert", "agents") - - sharepoint_content = f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden: - -## SharePoint-Suchergebnisse - -Basierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert: - -### Dokumente -1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025) - - Standort: Projekte/Strategische Planung - - Autor: Maria Schmidt - - Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine - -2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025) - - Standort: Marketing/Marktforschung - - Autor: Thomas Müller - - Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken - -3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025) - - Standort: Finanzen/Planung - - Autor: Sarah Weber - - Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen - -### SharePoint-Listen -1. **Projektstatusliste** - - 12 Einträge mit relevanten Projektstatusinformationen - - Letzte Aktualisierung: 25.03.2025 - -## Zusammenfassung der Inhalte - -Die gefundenen Dokumente zeigen übereinstimmend, dass: -- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt -- Das Budget für Forschung und Entwicklung um 15% erhöht wurde -- Drei neue Hauptprojekte im zweiten Quartal starten werden - -## Empfehlungen - -Basierend auf den gefundenen Informationen empfehle ich: -1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen -2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen -3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden - -Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.""" - - logging_utils.info("SharePoint-Ergebnisse zusammengestellt", "agents") - - # Set the content in the response - response["content"] = sharepoint_content - - # Finalize the message - self.message_utils.finalize_message(response) - response["result_format"] = self.result_format - - return response - - except Exception as e: - error_msg = f"Fehler bei der SharePoint-Interaktion: {str(e)}" - logging_utils.error(error_msg, "error") - - # Create error response - response["content"] = f"## Fehler bei der SharePoint-Interaktion\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" - self.message_utils.finalize_message(response) - - return response - -# Singleton-Instanz -_sharepoint_agent = None - -def get_sharepoint_agent(): - """Gibt eine Singleton-Instanz des SharePoint-Agenten zurück""" - global _sharepoint_agent - if _sharepoint_agent is None: - _sharepoint_agent = SharepointAgent() - return _sharepoint_agent \ No newline at end of file diff --git a/gwserver/modules/agentservice_agent_webcrawler.py b/gwserver/modules/agentservice_agent_webcrawler.py index 6581f969..2b91cc56 100644 --- a/gwserver/modules/agentservice_agent_webcrawler.py +++ b/gwserver/modules/agentservice_agent_webcrawler.py @@ -19,11 +19,13 @@ import requests from modules.agentservice_base import BaseAgent from connectors.connector_aichat_openai import ChatService from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils +from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol logger = logging.getLogger(__name__) class WebcrawlerAgent(BaseAgent): + """Agent für Web-Recherche und Informationsbeschaffung""" def __init__(self): @@ -36,6 +38,14 @@ class WebcrawlerAgent(BaseAgent): self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration" self.result_format = "SearchResults" + # Add enhanced document capabilities + self.supports_documents = True + self.document_capabilities = ["read", "create"] + self.required_context = ["workflow_id"] + + # Initialize protocol + self.protocol = AgentCommunicationProtocol() + # Chat-Service initialisieren self.chat_service = ChatService() @@ -61,74 +71,130 @@ class WebcrawlerAgent(BaseAgent): def get_agent_info(self) -> Dict[str, Any]: """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format, + info = super().get_agent_info() + info.update({ "metadata": { "max_url": self.max_url, "max_result": self.max_result, "timeout": self.timeout } - } - - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: + }) + return info + + async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ Verarbeitet eine Nachricht und führt eine Web-Recherche durch. Args: message: Die zu verarbeitende Nachricht - workflow: Der aktuelle Workflow context: Zusätzlicher Kontext - log_func: Funktion für Workflow-Logging Returns: Die generierte Antwort mit der Web-Recherche """ - # Initialize logging - workflow_id = workflow.get("id", "unknown") - logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"WebcrawlerAgent startet Web-Recherche", "agents") + # Extract workflow_id from context or message + workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown") - # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") + # Get or create logging_utils + log_func = context.get("log_func") if context else None + logging_utils = LoggingUtils(workflow_id, log_func) + + # Send status update using protocol + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Starte Web-Recherche", + sender_id=self.id, + status="in_progress", + progress=0.0, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Create response structure + response = { + "role": "assistant", + "content": "", + "agent_id": self.id, + "agent_type": self.type, + "agent_name": self.name, + "result_format": self.result_format, + "workflow_id": workflow_id + } try: # Get the query from the message prompt = await self.get_prompt(message) logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents") - # Führe die Web-Recherche durch und warte auf das Ergebnis mit await + # Update progress using protocol + if log_func: + status_message = self.protocol.create_status_update_message( + status_description=f"Recherchiere: {prompt[:30]}...", + sender_id=self.id, + status="in_progress", + progress=0.3, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) + + # Führe die Web-Recherche durch web_query_result = await self.get_web_query(message) - logging_utils.info("Web-Recherche abgeschlossen", "agents") + + # Final status update + if log_func: + status_message = self.protocol.create_status_update_message( + status_description="Web-Recherche abgeschlossen", + sender_id=self.id, + status="completed", + progress=1.0, + context_id=workflow_id + ) + log_func(workflow_id, status_message.content, "info", self.id, self.name) # Set the content in the response response["content"] = web_query_result - # Finalize the message - self.message_utils.finalize_message(response) - response["result_format"] = self.result_format - return response except Exception as e: error_msg = f"Fehler bei der Web-Recherche: {str(e)}" logging_utils.error(error_msg, "error") - # Create error response + # Create error response using protocol + error_message = self.protocol.create_error_message( + error_description=error_msg, + sender_id=self.id, + error_type="web_search", + error_details={"traceback": traceback.format_exc()}, + context_id=workflow_id + ) + response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```" - self.message_utils.finalize_message(response) return response + + def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: + """Send a document request using the protocol""" + return self.protocol.create_document_request_message( + document_description=document_description, + sender_id=sender_id, + receiver_id=receiver_id, + filters=filters, + context_id=context_id + ) + + def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str, + output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage: + """Send a result message using the protocol""" + return self.protocol.create_result_message( + result_content=result_content, + sender_id=sender_id, + receiver_id=receiver_id, + task_id=task_id, + output_data=output_data, + result_format="SearchResults", + context_id=context_id + ) async def get_prompt(self, message_context: Dict[str, Any]) -> str: task = message_context.get("content", "") @@ -142,17 +208,29 @@ class WebcrawlerAgent(BaseAgent): logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.") if isinstance(result_json, list): + total_tokens = 0 + for i, result in enumerate(result_json, 1): - + # Limit content size for each result + result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000) # Allow ~15000 tokens per result + web_answer_instructions = f""" Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}' Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen. Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen. Dies ist das Resultat: - {result['data']} + {result_data_limited} """ + # Count tokens in the instructions to ensure we don't exceed API limits + instruction_tokens = self.count_tokens(web_answer_instructions) + if total_tokens + instruction_tokens > 60000: + logger.warning(f"Skipping result {i} to avoid exceeding token limit") + break + + total_tokens += instruction_tokens + # Zusätzliche Anweisungen für Web-Recherche content_text = await self.chat_service.call_api( messages=[ @@ -166,16 +244,25 @@ class WebcrawlerAgent(BaseAgent): } ] ) - result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_text}" - summary_src+=f"\n{content_text}" + + # Create a summary but ensure we stay within token limits + content_summary = content_text[:2000] # Limit to ~2000 characters + result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_summary}" + summary_src += f"\n{content_summary}" + + # Update token count + total_tokens += self.count_tokens(content_summary) + 100 # Add buffer for formatting else: result_data = "no data received" logger.info(f"Web analysis result sent {len(result_data)}B") # Zusätzliche Zusammenfassung - summary="" - if len(summary_src)>1: + summary = "" + if len(summary_src) > 1: + # Limit summary source to ensure we don't exceed API limits + summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000) + summary = await self.chat_service.call_api( messages=[ { @@ -184,7 +271,7 @@ class WebcrawlerAgent(BaseAgent): }, { "role": "user", - "content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src}\n" + "content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src_limited}\n" } ] ) @@ -193,231 +280,323 @@ class WebcrawlerAgent(BaseAgent): result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}" return result - async def run_web_query(self, prompt: str) -> List[Dict]: - if prompt=="": - return [] + async def run_web_query(self, prompt: str) -> List[Dict]: + if prompt=="": + return [] - ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open. + ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open. - 'url': A list of maximum {self.max_url} specific URLs extracted from the task string. + 'url': A list of maximum {self.max_url} specific URLs extracted from the task string. - 'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information. - - Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition. - """ - - content_text = await self.chat_service.call_api( - messages=[ - { - "role": "system", - "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt." - }, - { - "role": "user", - "content": ptext - } - ] - ) - # Remove markdown formatting if present - if content_text.startswith("```json"): - # Find the end of the JSON block - end_marker = "```" - end_index = content_text.rfind(end_marker) - if end_index != -1: - # Extract the JSON content without the markdown markers - content_text = content_text[7:end_index].strip() - - # Now parse the JSON - try: - logger.info(f"Valid json received: {str(content_text)}") - pjson = json.loads(content_text) - # Now call scrape_json with the parsed dictionary - result_json = await self.scrape_json(pjson) - return result_json - except json.JSONDecodeError as e: - logger.error(f"Failed to parse JSON: {e}") - logger.error(f"Cleaned content: {content_text[:100]}...") - return [] - - async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]: - """ - Scrapes web content based on a research strategy JSON. - - Args: - research_strategy: A dictionary containing: - - 'skey': List of search keywords - - 'url': List of direct URLs to scrape - - Returns: - Dictionary with URLs as keys and scraped content as values - """ - - logger.info("Starting JSON-based web scraping") - results = [] - - # Validate input structure - if not isinstance(research_strategy, dict): - logger.error("Invalid research_strategy format: not a dictionary") - return {"error": "Invalid research_strategy format: not a dictionary"} - - keys = research_strategy.get("skey", []) - direct_urls = research_strategy.get("url", []) - - if not isinstance(keys, list) or not isinstance(direct_urls, list): - logger.error("Invalid research_strategy format: keys, or url is not a list") - return {"error": "Invalid research_strategy format: keys, or url is not a list"} - - # Process search keywords through search engine - for keyword in keys: - logger.info(f"Processing keyword: {keyword}") - found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data - logger.info(f"... {len(found_results)} results found") - results.extend(found_results) - - # Process direct URLs - logger.info(f"Processing {len(direct_urls)} direct URLs") - for url in direct_urls: - if url in results: - logger.info(f"Skipping already scraped URL: {url}") - continue - soup=self.read_url(url) - - # Extract title from the page if it exists - if isinstance(soup, BeautifulSoup): - title_tag = soup.find('title') - title = title_tag.text.strip() if title_tag else "No title" - - # Alternative: You could also look for h1 tags if the title tag is missing - if title == "No title": - h1_tag = soup.find('h1') - if h1_tag: - title = h1_tag.text.strip() - else: - # Handle the case where soup is an error message string - title = "Error fetching page" - - results.append(self.parse_result(soup,"No title",url)) - logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total") - return results - - def search_web(self, query: str) -> List[Dict]: - formatted_query = quote_plus(query) - url = f"https://html.duckduckgo.com/html/?q={formatted_query}" - - search_results_soup = self.read_url(url) - if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0: - logger.warning(f"Keine Suchergebnisse gefunden für: {query}") - return [] - - # Extract search results - results = [] - - # Find all result containers - result_elements = search_results_soup.select('.result') - - for result in result_elements: - # Extract title - title_element = result.select_one('.result__a') - title = title_element.text.strip() if title_element else 'No title' + 'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information. - # Extract URL (DuckDuckGo uses redirects, need to extract from href param) - url_element = title_element.get('href') if title_element else '' - extracted_url = 'No URL' + Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition. + """ + + content_text = await self.chat_service.call_api( + messages=[ + { + "role": "system", + "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt." + }, + { + "role": "user", + "content": ptext + } + ] + ) + # Remove markdown formatting if present + if content_text.startswith("```json"): + # Find the end of the JSON block + end_marker = "```" + end_index = content_text.rfind(end_marker) + if end_index != -1: + # Extract the JSON content without the markdown markers + content_text = content_text[7:end_index].strip() - if url_element: - # Extract the actual URL from DuckDuckGo's redirect - if url_element.startswith('/d.js?q='): - start = url_element.find('?q=') + 3 # Skip '?q=' - end = url_element.find('&', start) if '&' in url_element[start:] else None - extracted_url = unquote(url_element[start:end]) + # Now parse the JSON + try: + logger.info(f"Valid json received: {str(content_text)}") + pjson = json.loads(content_text) + # Now call scrape_json with the parsed dictionary + result_json = await self.scrape_json(pjson) + return result_json + except json.JSONDecodeError as e: + logger.error(f"Failed to parse JSON: {e}") + logger.error(f"Cleaned content: {content_text[:100]}...") + return [] + + async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]: + """ + Scrapes web content based on a research strategy JSON. + + Args: + research_strategy: A dictionary containing: + - 'skey': List of search keywords + - 'url': List of direct URLs to scrape - # Make sure the URL has the correct protocol prefix - if not extracted_url.startswith(('http://', 'https://')): - if not extracted_url.startswith('//'): - extracted_url = 'https://' + extracted_url - else: - extracted_url = 'https:' + extracted_url - else: - extracted_url = url_element - - # Extract snippet directly from search results page - snippet_element = result.select_one('.result__snippet') - snippet = snippet_element.text.strip() if snippet_element else 'No description' - - # Now fetch the actual page content for the data field - target_page_soup = self.read_url(extracted_url) - - results.append({ - 'title': title, - 'url': extracted_url, - 'snippet': snippet, - 'data': str(target_page_soup) if isinstance(target_page_soup, BeautifulSoup) else "Error fetching page" - }) - - # Limit the number of results if needed - if len(results) >= self.max_result: - break - - return results + Returns: + Dictionary with URLs as keys and scraped content as values + """ - def read_url(self, url: str) -> BeautifulSoup: - """ - Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück. - Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben. - - Args: - url: Die zu lesende URL + logger.info("Starting JSON-based web scraping") + results = [] - Returns: - BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern - """ - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', - 'Accept': 'text/html,application/xhtml+xml,application/xml', - 'Accept-Language': 'en-US,en;q=0.9', - } - - try: - import time + # Validate input structure + if not isinstance(research_strategy, dict): + logger.error("Invalid research_strategy format: not a dictionary") + return {"error": "Invalid research_strategy format: not a dictionary"} - # Initialer Request - response = requests.get(url, headers=headers, timeout=10) + keys = research_strategy.get("skey", []) + direct_urls = research_strategy.get("url", []) - # Polling für Status 202 - if response.status_code == 202: - # Maximal 3 Versuche mit steigenden Intervallen - backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s + if not isinstance(keys, list) or not isinstance(direct_urls, list): + logger.error("Invalid research_strategy format: keys, or url is not a list") + return {"error": "Invalid research_strategy format: keys, or url is not a list"} + + # Process search keywords through search engine + for keyword in keys: + logger.info(f"Processing keyword: {keyword}") + found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data + logger.info(f"... {len(found_results)} results found") + results.extend(found_results) + + # Process direct URLs + logger.info(f"Processing {len(direct_urls)} direct URLs") + for url in direct_urls: + if url in results: + logger.info(f"Skipping already scraped URL: {url}") + continue + soup=self.read_url(url) + + # Extract title from the page if it exists + if isinstance(soup, BeautifulSoup): + title_tag = soup.find('title') + title = title_tag.text.strip() if title_tag else "No title" + + # Alternative: You could also look for h1 tags if the title tag is missing + if title == "No title": + h1_tag = soup.find('h1') + if h1_tag: + title = h1_tag.text.strip() + else: + # Handle the case where soup is an error message string + title = "Error fetching page" + + results.append(self.parse_result(soup,"No title",url)) + logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total") + return results + + def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str: + """ + Extract the main content from an HTML page while limiting character count. + + Args: + soup: BeautifulSoup object containing the page content + max_chars: Maximum number of characters to extract - for wait_time in backoff_times: - time.sleep(wait_time) # Warten mit steigender Zeit + Returns: + Extracted main content as string + """ + if not isinstance(soup, BeautifulSoup): + return str(soup)[:max_chars] + + # Try to find main content elements in order of priority + main_content = None + for selector in ['main', 'article', '#content', '.content', '#main', '.main']: + content = soup.select_one(selector) + if content: + main_content = content + break + + # If no main content found, use the body + if not main_content: + main_content = soup.find('body') or soup + + # Remove script, style, nav, footer elements that don't contribute to main content + for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'): + element.extract() + + # Extract text content + text_content = main_content.get_text(separator=' ', strip=True) + + # Limit to max_chars + return text_content[:max_chars] + + def tokenize_for_counting(self, text: str) -> List[str]: + """ + Simple token counter for estimating token usage. + This is an approximation since the exact tokenization depends on the model. + + Args: + text: Input text + + Returns: + List of tokens + """ + # Simple tokenization by splitting on whitespace and punctuation + import re + return re.findall(r'\w+|[^\w\s]', text) + + def count_tokens(self, text: str) -> int: + """ + Count the approximate number of tokens in a text. + + Args: + text: Input text + + Returns: + Estimated token count + """ + tokens = self.tokenize_for_counting(text) + return len(tokens) + + def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str: + """ + Limit the text to a maximum number of tokens. + + Args: + text: Input text + max_tokens: Maximum number of tokens allowed + + Returns: + Limited text + """ + if not text: + return "" + + tokens = self.tokenize_for_counting(text) + + # If text is already under the limit, return as is + if len(tokens) <= max_tokens: + return text + + # Otherwise, truncate text to max_tokens + return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]" + + def search_web(self, query: str) -> List[Dict]: + formatted_query = quote_plus(query) + url = f"https://html.duckduckgo.com/html/?q={formatted_query}" + + search_results_soup = self.read_url(url) + if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0: + logger.warning(f"Keine Suchergebnisse gefunden für: {query}") + return [] + + # Extract search results + results = [] + + # Find all result containers + result_elements = search_results_soup.select('.result') + + for result in result_elements: + # Extract title + title_element = result.select_one('.result__a') + title = title_element.text.strip() if title_element else 'No title' + + # Extract URL (DuckDuckGo uses redirects, need to extract from href param) + url_element = title_element.get('href') if title_element else '' + extracted_url = 'No URL' + + if url_element: + # Extract the actual URL from DuckDuckGo's redirect + if url_element.startswith('/d.js?q='): + start = url_element.find('?q=') + 3 # Skip '?q=' + end = url_element.find('&', start) if '&' in url_element[start:] else None + extracted_url = unquote(url_element[start:end]) + + # Make sure the URL has the correct protocol prefix + if not extracted_url.startswith(('http://', 'https://')): + if not extracted_url.startswith('//'): + extracted_url = 'https://' + extracted_url + else: + extracted_url = 'https:' + extracted_url + else: + extracted_url = url_element + + # Extract snippet directly from search results page + snippet_element = result.select_one('.result__snippet') + snippet = snippet_element.text.strip() if snippet_element else 'No description' + + # Now fetch the actual page content for the data field + target_page_soup = self.read_url(extracted_url) + + # Use the new content extraction method to limit content size + content = self.extract_main_content(target_page_soup, max_chars=30000) + + results.append({ + 'title': title, + 'url': extracted_url, + 'snippet': snippet, + 'data': content + }) + + # Limit the number of results if needed + if len(results) >= self.max_result: + break + + return results + + + def read_url(self, url: str) -> BeautifulSoup: + """ + Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück. + Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben. + + Args: + url: Die zu lesende URL + + Returns: + BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern + """ + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + 'Accept': 'text/html,application/xhtml+xml,application/xml', + 'Accept-Language': 'en-US,en;q=0.9', + } + + try: + import time + + # Initialer Request response = requests.get(url, headers=headers, timeout=10) - # Wenn kein 202 mehr, dann abbrechen - if response.status_code != 202: - break - - # Für andere Fehler-Status einen Fehler auslösen - response.raise_for_status() - - # HTML parsen - return BeautifulSoup(response.text, 'html.parser') - - except Exception as e: - # Leeres BeautifulSoup-Objekt erstellen - return BeautifulSoup("", 'html.parser') - - def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]: - # Extract snippet/description - snippet_element = data.select_one('.result__snippet') - snippet = snippet_element.text.strip() if snippet_element else 'No description' - - result={ - 'title': title, - 'url': url, - 'snippet': snippet, - 'data': data.prettify() - } - return result + # Polling für Status 202 + if response.status_code == 202: + # Maximal 3 Versuche mit steigenden Intervallen + backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s + + for wait_time in backoff_times: + time.sleep(wait_time) # Warten mit steigender Zeit + response = requests.get(url, headers=headers, timeout=10) + + # Wenn kein 202 mehr, dann abbrechen + if response.status_code != 202: + break + + # Für andere Fehler-Status einen Fehler auslösen + response.raise_for_status() + + # HTML parsen + return BeautifulSoup(response.text, 'html.parser') + + except Exception as e: + # Leeres BeautifulSoup-Objekt erstellen + return BeautifulSoup("", 'html.parser') + + def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]: + # Extract snippet/description + snippet_element = data.select_one('.result__snippet') + snippet = snippet_element.text.strip() if snippet_element else 'No description' + + result={ + 'title': title, + 'url': url, + 'snippet': snippet, + 'data': data.prettify() + } + return result + # Singleton-Instanz _webcrawler_agent = None diff --git a/gwserver/modules/agentservice_base.py b/gwserver/modules/agentservice_base.py index 67348e5d..8291a3be 100644 --- a/gwserver/modules/agentservice_base.py +++ b/gwserver/modules/agentservice_base.py @@ -1,36 +1,61 @@ """ -Erweiterte Basisklasse für Agenten im Agentservice. -Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit. +Enhanced base agent class for the Agentservice. +Provides improved communication and document handling capabilities. """ import logging -from typing import List, Dict, Any, Optional, Tuple +import json +from typing import Dict, Any, List, Optional, Tuple, Union +import asyncio +from datetime import datetime +import uuid logger = logging.getLogger(__name__) class BaseAgent: - """Basisklasse für alle Agenten im System""" + """ + Enhanced base agent class with improved communication capabilities. + All specialized agents should inherit from this class. + """ def __init__(self): - """Initialisiert den Basis-Agenten""" + """Initialize the enhanced agent.""" self.id = "base_agent" self.name = "Base Agent" self.type = "base" - self.description = "Basisagent als Vorlage für spezialisierte Agenten" - self.capabilities = "Grundlegende Agentenoperationen" - self.instructions = """ - Als Basis-Agent kannst du grundlegende Aufgaben erledigen. - Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden. + self.description = "Base agent for the Agentservice" + self.capabilities = "Basic agent operations" + self.result_format = "Text" + + # New properties for document handling + self.supports_documents = True + self.document_capabilities = ["read", "reference"] + self.required_context = [] + + # System dependencies + self.ai_service = None + self.document_handler = None + self.lucydom_interface = None + + def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None): """ - # Neues Attribut für das Rückgabeformat - self.result_format = "Text" # Standard: Textformat + Set system dependencies. + + Args: + ai_service: AI service for text generation + document_handler: Document handler for document operations + lucydom_interface: LucyDOM interface for database access + """ + self.ai_service = ai_service + self.document_handler = document_handler + self.lucydom_interface = lucydom_interface def get_agent_info(self) -> Dict[str, Any]: """ - Gibt Informationen über den Agenten zurück. + Get detailed information about the agent. Returns: - Dict mit Agenten-Informationen + Dictionary with agent information """ return { "id": self.id, @@ -38,87 +63,198 @@ class BaseAgent: "type": self.type, "description": self.description, "capabilities": self.capabilities, - "instructions": self.instructions, - "result_format": self.result_format, # Rückgabeformat hinzugefügt - "used": False, # Wird zur Laufzeit aktualisiert - "last_result_status": None # Wird zur Laufzeit aktualisiert + "result_format": self.result_format, + "supports_documents": self.supports_documents, + "document_capabilities": self.document_capabilities, + "required_context": self.required_context } - def get_prompt(self, message_context: Dict[str, Any]) -> str: + def get_capabilities(self) -> List[str]: """ - Generiert einen an den Agenten angepassten Prompt basierend auf Kontext. + Get a list of agent capabilities. - Args: - message_context: Kontext der Nachricht - Returns: - Formatierter Prompt für den Agenten + List of capability strings """ - # Basis-Prompt, der von spezialisierten Agenten überschrieben werden kann - base_prompt = f""" - Du bist {self.name}, ein {self.type} Agent. - - {self.description} - - Deine Fähigkeiten: {self.capabilities} - - {self.instructions} - - Rückgabeformat: {self.result_format} - - Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage. - Deklariere am Ende deiner Antwort den Status deines Ergebnisses: - [STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast - [STATUS: TEILWEISE] - Wenn du ein teilweises Ergebnis geliefert hast - [STATUS: PLAN] - Wenn du nur einen Plan vorgeschlagen hast + # Split capabilities into a list + if isinstance(self.capabilities, str): + return [cap.strip() for cap in self.capabilities.split(",")] + return [] + + def get_supported_formats(self) -> List[str]: """ + Get supported output formats. - return base_prompt.strip() + Returns: + List of supported format strings + """ + if isinstance(self.result_format, str): + return [fmt.strip() for fmt in self.result_format.split(",")] + return ["Text"] async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]: """ - Verarbeitet eine Nachricht und generiert eine Antwort. - Sollte von spezialisierten Agenten überschrieben werden. + Process a message and generate a response. Args: - message: Die zu verarbeitende Nachricht - context: Zusätzlicher Kontext (optional) + message: Input message + context: Optional context information Returns: - Die generierte Antwort + Response message """ - # Basis-Implementierung, die einfach eine Standardantwort zurückgibt - return { - "role": "assistant", - "content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]", - "agent_type": self.type, - "result_format": self.result_format # Rückgabeformat in der Antwort - } + # Basic implementation - should be overridden by specialized agents + if not self.ai_service: + logger.warning(f"Agent {self.id} has no AI service configured") + return { + "role": "assistant", + "content": f"I'm {self.name}, but I'm not properly configured. Please set up the AI service.", + "agent_id": self.id, + "agent_type": self.type, + "result_format": "Text" + } + + # Process documents if available and set up document handler + document_context = "" + if self.supports_documents and self.document_handler and message.get("documents"): + document_context = await self._process_documents(message) + + # Create enhanced prompt + prompt = self._create_enhanced_prompt(message, document_context, context) + + # Generate response + try: + response_content = await self.ai_service.call_api([ + {"role": "system", "content": self._get_system_prompt()}, + {"role": "user", "content": prompt} + ]) + + # Process the response to extract any special instructions or status + content, status = self._process_response(response_content) + + return { + "role": "assistant", + "content": content, + "agent_id": self.id, + "agent_type": self.type, + "agent_name": self.name, + "result_format": self.result_format, + "status": status, + "workflow_id": message.get("workflow_id"), + "documents": message.get("documents", []) # Pass through documents + } + except Exception as e: + logger.error(f"Error in agent {self.id}: {str(e)}") + return { + "role": "assistant", + "content": f"I encountered an error: {str(e)}", + "agent_id": self.id, + "agent_type": self.type, + "result_format": "Text", + "status": "error" + } - def extract_status(self, content: str) -> Tuple[str, str]: + async def _process_documents(self, message: Dict[str, Any]) -> str: """ - Extrahiert den Status aus dem Inhalt der Antwort. + Process documents in the message. Args: - content: Inhalt der Antwort + message: Input message with documents Returns: - Tuple mit (bereinigter Text, Status) + Document context as text """ - import re + # Simply extract text from documents + if not self.document_handler: + return "" - # Standard-Status, falls keine Deklaration gefunden wird - status = "UNBEKANNT" + return self.document_handler.merge_document_contents(message) + + def _create_enhanced_prompt(self, message: Dict[str, Any], document_context: str, context: Dict[str, Any] = None) -> str: + """ + Create an enhanced prompt with context. - # Suche nach Status-Deklaration - status_pattern = r'\[STATUS:\s*(ERGEBNIS|TEILWEISE|PLAN)\]' - match = re.search(status_pattern, content, re.IGNORECASE) - - if match: - # Extrahiere den Status - status = match.group(1).upper() + Args: + message: Input message + document_context: Document context + context: Optional additional context - # Entferne die Status-Deklaration aus dem Text - content = re.sub(status_pattern, '', content, flags=re.IGNORECASE).strip() + Returns: + Enhanced prompt + """ + prompt = message.get("content", "") - return content, status \ No newline at end of file + # Add document context if available + if document_context: + prompt += f"\n\n=== DOCUMENT CONTEXT ===\n{document_context}" + + # Add any additional context + if context: + # Add expected format if specified + if "expected_format" in context: + prompt += f"\n\nPlease format your response as: {context['expected_format']}" + + # Add dependency outputs if available + if "dependency_outputs" in context: + prompt += "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n" + for key, value in context["dependency_outputs"].items(): + if isinstance(value, dict) and "content" in value: + prompt += f"\n--- {key} ---\n{value['content']}\n" + else: + prompt += f"\n--- {key} ---\n{str(value)}\n" + + return prompt + + def _get_system_prompt(self) -> str: + """ + Get the system prompt for the agent. + + Returns: + System prompt string + """ + return f""" + You are {self.name}, a specialized {self.type} agent. + + {self.description} + + Your capabilities include: {self.capabilities} + + You should format your responses according to: {self.result_format} + + Respond clearly and helpfully to the user's request. + When appropriate, include a status indicator at the end of your message: + + [STATUS: COMPLETE] - When you've fully addressed the request + [STATUS: PARTIAL] - When you've partially addressed the request + [STATUS: QUESTION] - When you need more information + """ + + def _process_response(self, response: str) -> Tuple[str, str]: + """ + Process the response to extract status and clean content. + + Args: + response: Raw response from the AI + + Returns: + Tuple of (cleaned content, status) + """ + # Default status + status = "complete" + + # Check for status tags + import re + status_match = re.search(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', response, re.IGNORECASE) + + if status_match: + status_value = status_match.group(1).lower() + # Remove the status tag + content = re.sub(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', '', response, flags=re.IGNORECASE).strip() + return content, status_value + + return response, status + +# Factory functions +def get_enhanced_base_agent() -> BaseAgent: + """Get an instance of the enhanced base agent.""" + return BaseAgent() diff --git a/gwserver/modules/agentservice_code_helpers.py b/gwserver/modules/agentservice_code_helpers.py deleted file mode 100644 index 11236a37..00000000 --- a/gwserver/modules/agentservice_code_helpers.py +++ /dev/null @@ -1,750 +0,0 @@ -""" -Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code. -Integriert direkten Code-Executor zur Vereinfachung des Ablaufs. -""" - -import logging -import json -import os -import asyncio -import re -import uuid -import subprocess -import tempfile -import traceback -import sys -import importlib.util -import inspect -from datetime import datetime -from typing import List, Dict, Any, Optional, Tuple, Union - -from modules.agentservice_base import BaseAgent -from modules.lucydom_interface import get_lucydom_interface -from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils -from connectors.connector_aichat_openai import ChatService -from modules import agentservice_code_helpers - -logger = logging.getLogger(__name__) - -class CodeExecutor: - """ - Führt generierten Code in einer isolierten virtuellen Umgebung aus, - während Zugriff auf spezifische App-Module gewährt wird und - automatisch erforderliche Pakete installiert werden. - """ - - def __init__(self, - app_modules: List[str] = None, - venv_path: Optional[str] = None, - timeout: int = 30, - max_memory_mb: int = 512, - allowed_packages: List[str] = None, - blocked_packages: List[str] = None): - """ - Initialisiert den CodeExecutor. - - Args: - app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen - venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt - timeout: Maximale Ausführungszeit in Sekunden - max_memory_mb: Maximaler Arbeitsspeicher in MB - allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte) - blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive) - """ - self.app_modules = app_modules or [] - self.venv_path = venv_path - self.timeout = timeout - self.max_memory_mb = max_memory_mb - self.temp_dir = None - self.allowed_packages = allowed_packages - self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"] - - def _create_venv(self) -> str: - """Erstellt eine virtuelle Umgebung und gibt den Pfad zurück.""" - if self.venv_path and os.path.exists(self.venv_path): - return self.venv_path - - # Temporäres Verzeichnis für die virtuelle Umgebung erstellen - self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_") - venv_path = os.path.join(self.temp_dir, "venv") - - try: - # Virtuelle Umgebung erstellen - logger.info(f"Erstelle virtuelle Umgebung in {venv_path}") - subprocess.run([sys.executable, "-m", "venv", venv_path], - check=True, - capture_output=True) - return venv_path - except subprocess.CalledProcessError as e: - logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}") - raise RuntimeError(f"Konnte venv nicht erstellen: {e}") - - def _get_pip_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung.""" - if os.name == 'nt': # Windows - return os.path.join(venv_path, "Scripts", "pip.exe") - else: # Unix/Linux - return os.path.join(venv_path, "bin", "pip") - - def _get_python_executable(self, venv_path: str) -> str: - """Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung.""" - if os.name == 'nt': # Windows - return os.path.join(venv_path, "Scripts", "python.exe") - else: # Unix/Linux - return os.path.join(venv_path, "bin", "python") - - def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]: - """ - Installiert Pakete in der virtuellen Umgebung. - - Args: - packages: Liste der zu installierenden Pakete - venv_path: Pfad zur virtuellen Umgebung - - Returns: - Tuple aus (Erfolg, Fehlermeldung) - """ - if not packages: - return True, "" - - # Überprüfen, ob Pakete erlaubt sind - blocked = [] - for package in packages: - # Paketname ohne Version extrahieren - pkg_name = re.split('[=<>]', package)[0].strip() - - if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]: - blocked.append(pkg_name) - - if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]: - blocked.append(pkg_name) - - if blocked: - return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}" - - # Pakete installieren - pip_executable = self._get_pip_executable(venv_path) - logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}") - - try: - # pip aktualisieren - mache diesen Schritt optional - try: - subprocess.run( - [pip_executable, "install", "--upgrade", "pip"], - check=False, # Changed from True to False to make it optional - capture_output=True, - timeout=60 - ) - except Exception as pip_error: - # Log the error but continue - logger.warning(f"Pip-Upgrade fehlgeschlagen, fahre mit Paketinstallation fort: {pip_error}") - - # Pakete installieren - process = subprocess.run( - [pip_executable, "install"] + packages, - check=True, - capture_output=True, - text=True, - timeout=120 # 2 Minuten Timeout für Paketinstallation - ) - - return True, process.stdout - except subprocess.CalledProcessError as e: - error_msg = f"Fehler bei der Paketinstallation: {e.stderr}" - logger.error(error_msg) - return False, error_msg - except subprocess.TimeoutExpired: - return False, "Zeitüberschreitung bei der Paketinstallation." - except Exception as e: - return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}" - - - def _extract_required_packages(self, code: str) -> List[str]: - """ - Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements - und Pip-Installationsanweisungen. - - Args: - code: Der Python-Code - - Returns: - Liste der erkannten Paketnamen - """ - packages = set() - - # Paketkommentare erkennen (# pip install package) - pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code) - for comment in pip_comments: - for pkg in comment.split(): - if pkg and not pkg.startswith('-'): - packages.add(pkg.strip()) - - # Import-Statements analysieren - import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE) - - # Standardmodule, die nicht installiert werden müssen - std_modules = { - 'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json', - 'collections', 'itertools', 'functools', 'pathlib', 'shutil', - 'tempfile', 'uuid', 'subprocess', 'threading', 'logging', - 'traceback', 'io', 'copy' - } - - # Module der App, die nicht installiert werden müssen - app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules) - - for module in import_lines: - if module not in std_modules and module not in app_modules_prefixes: - packages.add(module) - - return list(packages) - - def _create_module_loader(self) -> str: - """ - Erstellt ein Hilfsskript, das App-Module in die venv importiert. - Gibt den Pfad zum Hilfsskript zurück. - """ - if not self.app_modules: - return "" - - # Temporäre Datei für den Module-Loader erstellen - module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - "module_loader.py") - - # Pfad zu den App-Modulen bestimmen - app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__))) - - # Modul-Loader-Code generieren - loader_code = f""" -import sys -import importlib.util -import os - -# App-Pfad zum Suchpfad hinzufügen -sys.path.insert(0, "{app_path}") - -# Module importieren -modules = {{}} -""" - - # Code zum Importieren der Module hinzufügen - for module_name in self.app_modules: - loader_code += f""" -try: - modules["{module_name}"] = __import__("{module_name}", fromlist=["*"]) - print(f"Modul '{module_name}' erfolgreich importiert") -except ImportError as e: - print(f"Fehler beim Importieren von '{module_name}': {{e}}") -""" - - # Loader-Datei schreiben - with open(module_loader_path, "w") as f: - f.write(loader_code) - - return module_loader_path - - def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]: - """ - Führt den generierten Code in einer isolierten Umgebung aus. - - Args: - code: Der auszuführende Python-Code - input_data: Eingabedaten für den Code (werden als JSON serialisiert) - - Returns: - Dict mit Ausführungsergebnissen, Ausgabe und Fehlern - """ - # Virtuelle Umgebung erstellen oder bestehende verwenden - venv_path = self._create_venv() - - # Erforderliche Pakete aus dem Code extrahieren - required_packages = self._extract_required_packages(code) - - # Pakete installieren, falls erforderlich - install_success = True - install_log = "" - if required_packages: - install_success, install_log = self._install_packages(required_packages, venv_path) - - if not install_success: - return { - "success": False, - "output": "", - "error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}", - "result": None, - "installed_packages": required_packages - } - - # Temporäre Datei für den Code erstellen - code_id = str(uuid.uuid4())[:8] - code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"ai_code_{code_id}.py") - - # Module-Loader erstellen - module_loader_path = self._create_module_loader() - - # Eingabedaten als JSON speichern, wenn vorhanden - input_path = "" - if input_data: - import json - input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"input_{code_id}.json") - with open(input_path, "w") as f: - json.dump(input_data, f) - - # Outputpfad für Ergebnisse - output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"), - f"output_{code_id}.json") - - # Prepare all paths using forward slashes for consistency across platforms - safe_module_loader_path = module_loader_path.replace('\\', '/') if module_loader_path else "" - safe_input_path = input_path.replace('\\', '/') if input_path else "" - safe_output_path = output_path.replace('\\', '/') - - wrapped_code = f""" -# -*- coding: utf-8 -*- -# coding: utf-8 -import sys -import json -import traceback -import os - -# Ergebnisstruktur -result = {{ - "success": False, - "output": "", - "error": "", - "result": None, - "installed_packages": {required_packages} -}} - -try: - # Module laden, falls erforderlich - if "{safe_module_loader_path}": - module_loader = __import__("module_loader") - globals().update({{k: v for k, v in module_loader.modules.items()}}) - - # Eingabedaten laden, falls vorhanden - input_data = None - if "{safe_input_path}": - with open("{safe_input_path}", "r") as f: - input_data = json.load(f) - - # Ausgabeumleitung - from io import StringIO - original_stdout = sys.stdout - original_stderr = sys.stderr - captured_stdout = StringIO() - captured_stderr = StringIO() - sys.stdout = captured_stdout - sys.stderr = captured_stderr - - # Benutzercode ausführen - try: - # Den Code in einem lokalen Namespace ausführen - local_vars = {{"input_data": input_data}} - exec('''{code}''', globals(), local_vars) - - # Ergebnis speichern, falls eine Variable 'result' definiert wurde - if "result" in local_vars: - result["result"] = local_vars["result"] - - result["success"] = True - except Exception as e: - result["error"] = str(e) - result["error"] += "\\n" + traceback.format_exc() - finally: - # Ausgabe erfassen - result["output"] = captured_stdout.getvalue() - result["error"] += captured_stderr.getvalue() - - # Ausgabeumleitung zurücksetzen - sys.stdout = original_stdout - sys.stderr = original_stderr - -except Exception as outer_e: - result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}" - -# Ergebnis speichern -with open("{safe_output_path}", "w") as f: - json.dump(result, f, default=str) -""" - - # Code in temporäre Datei schreiben with UTF-8 encoding - with open(code_file_path, "w", encoding="utf-8") as f: - f.write(wrapped_code) - - # Python-Interpreter aus der virtuellen Umgebung bestimmen - python_executable = self._get_python_executable(venv_path) - - # Code ausführen - logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}") - try: - # Prozess mit Ressourcenbeschränkungen ausführen - cmd = [python_executable, code_file_path] - - # Umgebungsvariablen setzen, um Speicherlimit zu erzwingen - env = os.environ.copy() - if self.max_memory_mb: - if os.name == 'posix': # Unix/Linux - # Auf Unix-Systemen können wir ulimit verwenden - cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"] - elif os.name == 'nt': # Windows - # Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden - # Hier müsste eine komplexere Lösung implementiert werden - pass - - # Prozess starten und mit Timeout ausführen - process = subprocess.run( - cmd, - timeout=self.timeout, - env=env, - capture_output=True, - text=True - ) - - # Ergebnis aus der Ausgabedatei lesen - if os.path.exists(output_path): - with open(output_path, "r") as f: - import json - execution_result = json.load(f) - else: - execution_result = { - "success": False, - "output": process.stdout, - "error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}", - "result": None, - "installed_packages": required_packages - } - - except subprocess.TimeoutExpired: - execution_result = { - "success": False, - "output": "", - "error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)", - "result": None, - "installed_packages": required_packages - } - except Exception as e: - execution_result = { - "success": False, - "output": "", - "error": f"Fehler bei der Ausführung: {str(e)}", - "result": None, - "installed_packages": required_packages - } - - # Informationen zur Paketinstallation hinzufügen - if install_log: - execution_result["package_install_log"] = install_log - - # Temporäre Dateien aufräumen - self._cleanup_temp_files([code_file_path, input_path, output_path]) - - return execution_result - - - def _cleanup_temp_files(self, file_paths: List[str]): - """Räumt temporäre Dateien auf.""" - for path in file_paths: - if path and os.path.exists(path): - try: - os.remove(path) - except Exception as e: - logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}") - - def cleanup(self): - """Räumt alle temporären Ressourcen auf.""" - if self.temp_dir and os.path.exists(self.temp_dir): - import shutil - try: - shutil.rmtree(self.temp_dir) - logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}") - except Exception as e: - logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}") - - def __del__(self): - """Aufräumen beim Garbage Collection.""" - self.cleanup() - - -class CoderAgent(BaseAgent): - """Erweiterter Agent für die Entwicklung und Ausführung von Python-Code""" - - def __init__(self): - """Initialize the coder agent with proper type and capabilities""" - super().__init__() - - # Agent metadata - self.id = "coder" - self.type = "coder" - self.name = "Python Code Agent" - self.description = "Entwickelt und führt Python-Code aus" - self.capabilities = "code_development,data_processing,file_processing,automation" - self.result_format = "python_code" - - # Init utilities - self.file_utils = FileUtils() - self.message_utils = MessageUtils() - - # Executor settings - self.executor_timeout = 60 # seconds - self.executor_memory_limit = 512 # MB - - # AI service settings - self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation - self.ai_max_tokens = 2000 # Enough tokens for complex code - - def get_agent_info(self) -> Dict[str, Any]: - """Get agent information for agent registry""" - return { - "id": self.id, - "type": self.type, - "name": self.name, - "description": self.description, - "capabilities": self.capabilities, - "result_format": self.result_format, - "metadata": { - "timeout": self.executor_timeout, - "memory_limit": self.executor_memory_limit - } - } - - async def process_message(self, message: Dict[str, Any], - workflow: Dict[str, Any], - context: Dict[str, Any] = None, - log_func=None) -> Dict[str, Any]: - """ - Processes a message to develop and execute Python code. - - Args: - message: The message to process - workflow: The current workflow - context: Additional context information - log_func: Function for workflow logging - - Returns: - Response message - """ - # Initialize logging - workflow_id = workflow.get("id") - logging_utils = LoggingUtils(workflow_id, log_func) - logging_utils.info(f"CoderAgent startet Verarbeitung", "agents") - - # Initialize utilities - workflow_utils = WorkflowUtils(workflow_id) - - # Create response message - response = self.message_utils.create_message(workflow_id, role="assistant") - response["agent_type"] = self.type - response["agent_name"] = self.name - response["parent_message_id"] = message.get("id") - - try: - # Check if user directly provided code - content = message.get("content", "") - documents = message.get("documents", []) - - # Extract code from message content - code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content) - code_to_execute = None - - if code_blocks: - # Use the first code block found - code_to_execute = code_blocks[0] - logging_utils.info(f"Code aus Nachricht extrahiert ({len(code_to_execute)} Zeichen)", "agents") - else: - # Generate code based on the message content using OpenAI - logging_utils.info("Kein Code in der Nachricht gefunden, generiere neuen Code mit AI", "agents") - - # Generate code using AI - code_to_execute = await self._generate_code_from_prompt(content, documents, context) - if not code_to_execute: - logging_utils.warning("AI konnte keinen Code generieren", "agents") - response["content"] = "Ich konnte basierend auf Ihrer Anfrage keinen ausführbaren Code generieren. Bitte geben Sie detailliertere Anweisungen an." - self.message_utils.finalize_message(response) - return response - logging_utils.info(f"Code mit AI generiert ({len(code_to_execute)} Zeichen)", "agents") - - # Get database interface for code execution - mandate_id = workflow.get("mandate_id", 0) - user_id = workflow.get("user_id", 0) - lucydom_interface = get_lucydom_interface(mandate_id, user_id) - - # Execute the code - if code_to_execute: - logging_utils.info("Führe Code aus", "execution") - - # Prepare execution context - execution_context = { - "workflow_id": workflow_id, - "documents": documents, - "message": message, - "mandate_id": mandate_id, - "user_id": user_id - } - - # Execute code - result = await self._execute_code(code_to_execute, lucydom_interface, execution_context) - - # Prepare response - if result.get("success", False): - # Code execution successful - output = result.get("output", "") - execution_result = result.get("result") - logging_utils.info("Code erfolgreich ausgeführt", "execution") - - # Format response content - response_content = f"## Code erfolgreich ausgeführt\n\n" - - # Include the executed code - response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n" - - # Include the output if available - if output: - response_content += f"### Ausgabe\n\n```\n{output}\n```\n\n" - - # Include the execution result if available - if execution_result: - result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) - response_content += f"### Ergebnis\n\n```\n{result_str}\n```\n\n" - - response["content"] = response_content - - # Process any files created by the code - if isinstance(execution_result, dict) and "created_files" in execution_result: - created_files = execution_result.get("created_files", []) - for file_info in created_files: - file_id = file_info.get("id") - if file_id: - logging_utils.info(f"Füge erstellte Datei {file_info.get('name', file_id)} zu Dokumenten hinzu", "files") - file_meta = lucydom_interface.get_file(file_id) - if file_meta: - # Add file document to the response - doc = { - "id": f"doc_{uuid.uuid4()}", - "source": file_meta, - "type": "file" - } - response["documents"].append(doc) - else: - # Code execution failed - error = result.get("error", "Unbekannter Fehler") - logging_utils.error(f"Fehler bei der Codeausführung: {error}", "execution") - - # Format error response - response_content = f"## Fehler bei der Codeausführung\n\n" - response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n" - response_content += f"### Fehler\n\n```\n{error}\n```\n\n" - - # Add recommendation based on error - response_content += self._get_error_recommendation(error) - - response["content"] = response_content - else: - # No code to execute - response["content"] = "Ich konnte keinen ausführbaren Code finden oder generieren. Bitte geben Sie Python-Code an oder erläutern Sie Ihre Anforderungen genauer." - - # Finalize response - self.message_utils.finalize_message(response) - - # Log success - logging_utils.info("CoderAgent hat die Anfrage erfolgreich verarbeitet", "agents") - - return response - - except Exception as e: - error_msg = f"Fehler bei der Verarbeitung durch den CoderAgent: {str(e)}" - logging_utils.error(error_msg, "error") - - # Create error response - response["content"] = f"## Fehler bei der Verarbeitung\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```" - self.message_utils.finalize_message(response) - - return response - - - async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]], context: Dict[str, Any] = None) -> str: - """ - Generate Python code from a prompt using OpenAI service. - - Args: - prompt: The prompt to generate code from - documents: Documents associated with the prompt - context: Additional context information - - Returns: - Generated Python code - """ - try: - # Initialize AI service - chat_service = ChatService() - - # Prepare a detailed prompt for code generation - ai_prompt = self._prepare_code_prompt(prompt, documents) - - # Create messages for the OpenAI API - messages = [ - {"role": "system", "content": "You are a Python code generator. Generate only executable Python code without explanations. The code should be well-commented, handle errors appropriately, and follow best practices."}, - {"role": "user", "content": ai_prompt} - ] - - # Call the OpenAI API - logging.info(f"Calling OpenAI API to generate code") - generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens) - - # Extract code from the response (the AI might wrap it in markdown) - code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content) - - if code_blocks: - # Use the first code block found - return code_blocks[0].strip() - else: - # If no code block is found, return the raw response - return generated_content.strip() - - except Exception as e: - logging.error(f"Error generating code with AI: {str(e)}", exc_info=True) - # Return a basic error-handling code - estr=str(e).replace('"', '\\"') - return f""" - # Error during code generation - print(f"An error occurred during code generation: {estr}") - # Return an error result - result = {{"error": "Code generation failed", "message": "{estr}"}} - """ - - def _prepare_code_prompt(self, user_prompt: str, documents: List[Dict[str, Any]]) -> str: - """ - Prepares a detailed prompt for the AI to generate Python code. - - Args: - user_prompt: The original user request - documents: Available documents - - Returns: - A detailed prompt for code generation - """ - # Start with the user's request - prompt = f"""Generate Python code to solve the following task: - {user_prompt} - - """ - - # Add information about available documents - if documents: - prompt += "\nAvailable documents:\n" - for i, doc in enumerate(documents): - source = doc.get("source", {}) - doc_name = source.get("name", f"Document {i+1}") - doc_type = source.get("content_type", "unknown") - doc_id = source.get("id", "") - - prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n" - - # Add information about how to access documents - prompt += """ -To access these documents, use: -- await load_file(file_id, encoding='utf-8') for text files -- await load_file(file_id) for binary files -""" \ No newline at end of file diff --git a/gwserver/modules/agentservice_dataextraction.py b/gwserver/modules/agentservice_dataextraction.py index dc9c5681..e957027b 100644 --- a/gwserver/modules/agentservice_dataextraction.py +++ b/gwserver/modules/agentservice_dataextraction.py @@ -17,59 +17,298 @@ async def data_extraction( ai_service, lucydom_interface = None, workflow_id: str = None, - add_log_func = None + add_log_func = None, + document_handler = None # Add this parameter ) -> Dict[str, Any]: """ - Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen, - und führt dann die notwendigen Extraktionen durch. + Performs AI-driven data extraction with support for the document handler. Args: - prompt: Spezifizierung, welche Daten extrahiert werden sollen - files: Liste aller verfügbaren Dateien mit Metadaten - messages: Liste aller Nachrichten im Workflow - ai_service: Service für KI-Anfragen - lucydom_interface: Interface für Datenbankzugriffe (optional) - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs + prompt: Specification of what data to extract + files: List of all available files with metadata + messages: List of all messages in the workflow + ai_service: Service for AI requests + lucydom_interface: Interface for database access (optional) + workflow_id: Optional workflow ID for logging + add_log_func: Optional function for adding logs + document_handler: Optional document handler for structured document operations Returns: - Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen + Structured text object with extracted data and context information """ try: - # 1. AI Call zur Bestimmung der notwendigen Extraktionen + # Create extraction plan using AI extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func) - # 2. Extraktionen durchführen - extracted_data = await _execute_extractions( - extraction_plan, - files, - messages, - lucydom_interface, - ai_service, - workflow_id, - add_log_func - ) - - # 3. Extrahierte Daten strukturieren + # Execute extractions, preferring document handler if available + if document_handler: + extracted_data = await _execute_extractions_with_handler( + extraction_plan, + files, + messages, + document_handler, + ai_service, + workflow_id, + add_log_func + ) + else: + # Fall back to original implementation + extracted_data = await _execute_extractions( + extraction_plan, + files, + messages, + lucydom_interface, + ai_service, + workflow_id, + add_log_func + ) + + # Structure extracted data structured_result = _structure_extracted_data(extracted_data, files, prompt) return structured_result except Exception as e: - logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True) + logger.error(f"Error in data extraction: {str(e)}", exc_info=True) - # Fehler-Log hinzufügen + # Add error log if add_log_func and workflow_id: - add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error") + add_log_func(workflow_id, f"Data extraction error: {str(e)}", "error") - # Fehler-Ergebnis zurückgeben + # Return error result return { "error": str(e), "status": "error", "files_processed": len(files), - "message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}" + "message": f"Data extraction failed: {str(e)}" } + +async def _execute_extractions_with_handler( + extraction_plan: List[Dict[str, Any]], + files: List[Dict[str, Any]], + messages: List[Dict[str, Any]], + document_handler, + ai_service, + workflow_id: str = None, + add_log_func = None +) -> List[Dict[str, Any]]: + """ + Execute extractions using the document handler. + + Args: + extraction_plan: List of extraction instructions + files: List of all available files + messages: List of all messages + document_handler: Document handler for structured operations + ai_service: Service for AI requests + workflow_id: Optional workflow ID for logging + add_log_func: Optional function for adding logs + + Returns: + List with extracted data per file + """ + extracted_data = [] + + # Sort by importance (highest first) + sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True) + + for extraction_item in sorted_plan: + file_id = extraction_item.get("file_id") + extract_needed = extraction_item.get("extract_needed", False) + extraction_prompt = extraction_item.get("extraction_prompt", "") + + # Find file metadata + file_metadata = next((f for f in files if f.get("id") == file_id), None) + + if not file_metadata: + logger.warning(f"File with ID {file_id} not found") + continue + + file_name = file_metadata.get("name", "") + file_type = file_metadata.get("type", "") + content_type = file_metadata.get("content_type", "") + + # Log + if add_log_func and workflow_id: + add_log_func( + workflow_id, + f"Processing file: {file_name} (Extraction needed: {extract_needed})", + "info" + ) + + # Only perform extraction if needed + if extract_needed: + # Find document in existing messages if available + existing_content = _find_document_in_messages(file_id, messages) + + # Check if we should use document handler for contextual extraction + if existing_content: + # If document exists but needs contextual extraction + document_id = existing_content.get("document_id") + message_id = existing_content.get("message_id") + + if document_id and message_id: + # Find the message containing the document + for message in messages: + if message.get("id") == message_id: + # Extract content with context + try: + # Find document reference + doc_reference = None + for doc in message.get("documents", []): + if doc.get("id") == document_id: + doc_reference = doc + break + + if doc_reference: + # Use document handler to perform contextual extraction + extracted_text = await document_handler.extract_document_content( + document_id, + file_id, + extraction_prompt + ) + + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": extracted_text, + "is_extracted": True, + "extraction_method": "contextual_extraction" + }) + + if add_log_func and workflow_id: + add_log_func( + workflow_id, + f"Contextual extraction for {file_name}: {extraction_prompt}", + "info" + ) + + continue + except Exception as e: + logger.error(f"Error in contextual extraction for {file_name}: {str(e)}") + + # If we reach here, we need to perform a new extraction + try: + file_content = await document_handler.add_file_to_message( + {}, # Empty message to extract just the document + file_id, + extraction_prompt + ) + + # Get the extracted content from the document + if "documents" in file_content and file_content["documents"]: + doc = file_content["documents"][0] + content_text = "" + is_extracted = False + + for content in doc.get("contents", []): + if content.get("type") == "text": + content_text = content.get("text", "") + is_extracted = content.get("is_extracted", False) + break + + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": content_text, + "is_extracted": is_extracted, + "extraction_method": "document_handler" + }) + + if add_log_func and workflow_id: + add_log_func( + workflow_id, + f"Extracted {file_name} using document handler", + "info" + ) + else: + # Extraction failed + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": f"Failed to extract content from {file_name}", + "is_extracted": False, + "extraction_method": "failed" + }) + except Exception as e: + logger.error(f"Error extracting {file_name}: {str(e)}") + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": f"Error extracting: {str(e)}", + "is_extracted": False, + "extraction_method": "error" + }) + else: + # No extraction needed, use existing content + existing_content = _find_document_in_messages(file_id, messages) + + if existing_content: + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": existing_content.get("content", ""), + "is_extracted": existing_content.get("is_extracted", False), + "extraction_method": "existing_content" + }) + else: + # No existing content found + extracted_data.append({ + "file_id": file_id, + "name": file_name, + "type": file_type, + "content": f"No content available for {file_name}", + "is_extracted": False, + "extraction_method": "none" + }) + + return extracted_data + + +def _find_document_in_messages(file_id: int, messages: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Find a document by file ID in workflow messages. + + Args: + file_id: ID of the file to find + messages: List of messages to search + + Returns: + Dictionary with document information or empty dict if not found + """ + for message in messages: + for doc_index, document in enumerate(message.get("documents", [])): + source = document.get("source", {}) + + # Check if file ID matches + if source.get("id") == str(file_id) or source.get("id") == file_id: + # Found the document + content_text = "" + is_extracted = False + + # Look for text content + for content in document.get("contents", []): + if content.get("type") == "text": + content_text = content.get("text", "") + is_extracted = content.get("is_extracted", False) + break + + return { + "document_id": document.get("id"), + "message_id": message.get("id"), + "content": content_text, + "is_extracted": is_extracted + } + + return {} + + async def _create_extraction_plan( prompt: str, files: List[Dict[str, Any]], @@ -454,6 +693,7 @@ def _structure_extracted_data( def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ Extract document contents for a specific file from workflow messages. + Enhanced to handle the new document structure. Args: file_id: ID of the file @@ -469,13 +709,21 @@ def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[s for document in message.get("documents", []): source = document.get("source", {}) - # Check if file ID matches - if source.get("id") == file_id or (source.get("type") == "file" and source.get("id") == file_id): + # Check if file ID matches (handle both string and int comparison) + if (source.get("id") == file_id or + (isinstance(source.get("id"), str) and source.get("id") == str(file_id)) or + (isinstance(file_id, str) and source.get("id") == int(file_id))): + # Add contents of the file doc_contents = document.get("contents", []) if doc_contents: - contents.extend(doc_contents) + # Ensure each content has document reference + for content in doc_contents: + content_copy = content.copy() + content_copy["document_id"] = document.get("id") + content_copy["message_id"] = message.get("id") + contents.append(content_copy) return contents diff --git a/gwserver/modules/agentservice_document_handler.py b/gwserver/modules/agentservice_document_handler.py new file mode 100644 index 00000000..15aa64b0 --- /dev/null +++ b/gwserver/modules/agentservice_document_handler.py @@ -0,0 +1,498 @@ +""" +Enhanced document handling module for the Agentservice (continued). +""" + +import os +import logging +import uuid +from datetime import datetime +from typing import List, Dict, Any, Optional, Tuple, Union + +logger = logging.getLogger(__name__) + +class DocumentHandler: + """ + Centralized document handler for consistent document management across the system. + """ + + def __init__(self, workflow_id: str = None, lucydom_interface = None, ai_service = None): + """Initialize the document handler.""" + self.workflow_id = workflow_id + self.lucydom_interface = lucydom_interface + self.ai_service = ai_service + + # Import necessary utilities + from modules.agentservice_filemanager import get_file_manager + self.file_manager = get_file_manager() + + def set_workflow_id(self, workflow_id: str): + """Set or update the workflow ID.""" + self.workflow_id = workflow_id + + def set_lucydom_interface(self, lucydom_interface): + """Set or update the LucyDOM interface.""" + self.lucydom_interface = lucydom_interface + + def set_ai_service(self, ai_service): + """Set or update the AI service.""" + self.ai_service = ai_service + + async def add_file_to_message(self, message: Dict[str, Any], file_id: int, extraction_prompt: str = None) -> Dict[str, Any]: + """ + Add a file to a message with optional contextual extraction. + + Args: + message: The message to add the file to + file_id: ID of the file to add + extraction_prompt: Optional prompt for contextual extraction (e.g., for images) + + Returns: + Updated message with the file added + """ + if not self.lucydom_interface: + logger.error("LucyDOM interface not available") + return message + + try: + # Get file metadata + file = self.lucydom_interface.get_file(file_id) + if not file: + logger.warning(f"File with ID {file_id} not found") + return message + + # Get necessary file information + file_name = file.get("name", "unnamed_file") + file_type = file.get("type", "unknown") + content_type = file.get("content_type") + + # Initialize documents array if needed + if "documents" not in message: + message["documents"] = [] + + # Check if file is already in the message + file_already_added = any( + doc.get("source", {}).get("id") == str(file_id) + for doc in message.get("documents", []) + ) + + if file_already_added: + logger.info(f"File {file_name} already exists in message, skipping") + return message + + # Create a unique document ID + doc_id = f"doc_{uuid.uuid4()}" + + # Create document structure + document = { + "id": doc_id, + "source": { + "type": "file", + "id": str(file_id), + "name": file_name, + "content_type": content_type, + "size": file.get("size"), + "upload_date": file.get("upload_date", datetime.now().isoformat()) + }, + "contents": [] + } + + # Only read content if we have extraction prompt or specific types + if (extraction_prompt or + file_type in ["document", "text"] or + (content_type and content_type.startswith("text/"))): + + # Read file content + file_content = await self.lucydom_interface.read_file_content(file_id) + + if file_content: + # Process based on file type + if file_type == "image" or (content_type and content_type.startswith("image/")): + # Image analysis if prompt provided + if extraction_prompt and self.ai_service and hasattr(self.ai_service, "analyze_image"): + try: + image_analysis = await self.ai_service.analyze_image( + image_data=file_content, + prompt=extraction_prompt or "Describe this image in detail", + mime_type=content_type + ) + + # Add the analysis as text content + document["contents"].append({ + "type": "text", + "text": f"Image Analysis:\n{image_analysis}", + "is_extracted": True, + "extraction_context": extraction_prompt + }) + + logger.info(f"Added image analysis for {file_name} to message") + except Exception as e: + logger.error(f"Error analyzing image {file_name}: {str(e)}") + document["contents"].append({ + "type": "text", + "text": f"Image file: {file_name} (Analysis failed: {str(e)})", + "is_extracted": False + }) + else: + # Just add placeholder if no analysis available + document["contents"].append({ + "type": "text", + "text": f"Image file: {file_name} (no analysis requested)", + "is_extracted": False + }) + else: + # For other file types, extract text + from modules.agentservice_utils import extract_text_from_file_content + + content, is_extracted = extract_text_from_file_content( + file_content, file_name, content_type + ) + + document["contents"].append({ + "type": "text", + "text": content, + "is_extracted": is_extracted, + "extraction_context": extraction_prompt + }) + + logger.info(f"Added text content for {file_name} to message (extracted: {is_extracted})") + else: + # No content available + document["contents"].append({ + "type": "text", + "text": f"File content not available for {file_name}", + "is_extracted": False + }) + else: + # Just add reference without content + document["contents"].append({ + "type": "text", + "text": f"File: {file_name} (content not loaded)", + "is_extracted": False + }) + + # Add document to message + message["documents"].append(document) + + logger.info(f"File {file_name} successfully added to message") + return message + + except Exception as e: + logger.error(f"Error adding file {file_id} to message: {str(e)}") + return message + + async def add_files_to_message(self, message: Dict[str, Any], file_ids: List[int], extraction_prompt: str = None) -> Dict[str, Any]: + """ + Add multiple files to a message. + + Args: + message: The message to add files to + file_ids: List of file IDs to add + extraction_prompt: Optional prompt for contextual extraction + + Returns: + Updated message with files added + """ + updated_message = message.copy() + + for file_id in file_ids: + updated_message = await self.add_file_to_message(updated_message, file_id, extraction_prompt) + + return updated_message + + async def extract_document_content(self, doc_id: str, message: Dict[str, Any], extraction_prompt: str) -> Dict[str, Any]: + """ + Extract or update document content with contextual extraction. + + Args: + doc_id: ID of the document to extract + message: Message containing the document + extraction_prompt: Contextual prompt for extraction + + Returns: + Updated message with extracted content + """ + if not message or "documents" not in message: + return message + + updated_message = message.copy() + + # Find the document + for i, document in enumerate(updated_message.get("documents", [])): + if document.get("id") == doc_id: + # Get file ID from source + source = document.get("source", {}) + file_id = source.get("id") + + if file_id and self.lucydom_interface: + # Get file metadata + file = self.lucydom_interface.get_file(int(file_id)) + if not file: + continue + + # Get file content + file_content = await self.lucydom_interface.read_file_content(int(file_id)) + if not file_content: + continue + + # Process based on file type + file_name = file.get("name", "unnamed_file") + file_type = file.get("type", "unknown") + content_type = file.get("content_type") + + # Update content based on file type + if file_type == "image" or (content_type and content_type.startswith("image/")): + if self.ai_service and hasattr(self.ai_service, "analyze_image"): + try: + image_analysis = await self.ai_service.analyze_image( + image_data=file_content, + prompt=extraction_prompt, + mime_type=content_type + ) + + # Create or update content + new_content = { + "type": "text", + "text": f"Image Analysis:\n{image_analysis}", + "is_extracted": True, + "extraction_context": extraction_prompt + } + + # Update or add content + contents = document.get("contents", []) + contents_updated = False + + for j, content in enumerate(contents): + if content.get("type") == "text": + updated_message["documents"][i]["contents"][j] = new_content + contents_updated = True + break + + if not contents_updated: + if not updated_message["documents"][i].get("contents"): + updated_message["documents"][i]["contents"] = [] + updated_message["documents"][i]["contents"].append(new_content) + + logger.info(f"Updated image analysis for {file_name} with new context: {extraction_prompt}") + except Exception as e: + logger.error(f"Error updating image analysis for {file_name}: {str(e)}") + else: + # For other file types, extract text with new context + from modules.agentservice_utils import extract_text_from_file_content + + content, is_extracted = extract_text_from_file_content( + file_content, file_name, content_type + ) + + new_content = { + "type": "text", + "text": content, + "is_extracted": is_extracted, + "extraction_context": extraction_prompt + } + + # Update or add content + contents = document.get("contents", []) + contents_updated = False + + for j, content_item in enumerate(contents): + if content_item.get("type") == "text": + updated_message["documents"][i]["contents"][j] = new_content + contents_updated = True + break + + if not contents_updated: + if not updated_message["documents"][i].get("contents"): + updated_message["documents"][i]["contents"] = [] + updated_message["documents"][i]["contents"].append(new_content) + + logger.info(f"Updated text extraction for {file_name} with new context: {extraction_prompt}") + + # Found and processed the document, stop searching + break + + return updated_message + + async def extract_files_from_workflow(self, workflow: Dict[str, Any], extraction_prompt: str, file_filter: str = None) -> Dict[str, Any]: + """ + Extract all relevant files from a workflow with context-aware extraction. + + Args: + workflow: The workflow object + extraction_prompt: Contextual prompt for extraction + file_filter: Optional filter for file types (e.g., "csv", "image") + + Returns: + Dictionary with extracted content + """ + # Import for data extraction + from modules.agentservice_dataextraction import data_extraction + + # Get all files from the workflow + files = [] + + # Process all messages + for message in workflow.get("messages", []): + # Extract documents from the message + for doc in message.get("documents", []): + source = doc.get("source", {}) + + # Only include file documents + if source.get("type") == "file": + file_info = { + "id": source.get("id", ""), + "name": source.get("name", ""), + "type": source.get("type", ""), + "content_type": source.get("content_type", ""), + "size": source.get("size", 0) + } + + # Apply filter if provided + if file_filter: + file_name = file_info.get("name", "").lower() + content_type = file_info.get("content_type", "").lower() + + if (file_filter.lower() in file_name or + file_filter.lower() in content_type): + # Check if file is already in the list + if not any(f.get("id") == file_info["id"] for f in files): + files.append(file_info) + else: + # No filter, include all files + if not any(f.get("id") == file_info["id"] for f in files): + files.append(file_info) + + # If no files found, return empty result + if not files: + return { + "prompt": extraction_prompt, + "files_processed": 0, + "extracted_content": [] + } + + # Get all messages from the workflow + workflow_messages = workflow.get("messages", []) + + # Extract data using the dataextraction module + extracted_data = await data_extraction( + prompt=extraction_prompt, + files=files, + messages=workflow_messages, + ai_service=self.ai_service, + lucydom_interface=self.lucydom_interface, + workflow_id=self.workflow_id, + add_log_func=None # We don't have access to add_log_func here + ) + + return extracted_data + + def get_file_content_from_message(self, message: Dict[str, Any], file_id: int = None, doc_id: str = None) -> str: + """ + Get file content from a message. + + Args: + message: The message containing the document + file_id: Optional file ID to search for + doc_id: Optional document ID to search for + + Returns: + Text content of the file if available + """ + if not message or "documents" not in message: + return "" + + # Search for the document + for document in message.get("documents", []): + # Match by document ID or file ID + source = document.get("source", {}) + source_file_id = source.get("id") + + if ((doc_id and document.get("id") == doc_id) or + (file_id and source_file_id and str(file_id) == str(source_file_id))): + + # Get text content from document + for content in document.get("contents", []): + if content.get("type") == "text": + return content.get("text", "") + + return "" + + def create_text_document(self, message: Dict[str, Any], content: str, title: str = "Generated Text") -> Dict[str, Any]: + """ + Create a new text document in a message. + + Args: + message: The message to add the document to + content: Text content + title: Document title + + Returns: + Updated message with the new document + """ + # Initialize documents array if needed + updated_message = message.copy() + if "documents" not in updated_message: + updated_message["documents"] = [] + + # Create document ID + doc_id = f"doc_{uuid.uuid4()}" + + # Create document structure + document = { + "id": doc_id, + "source": { + "type": "generated", + "id": doc_id, + "name": title, + "content_type": "text/plain", + "size": len(content) + }, + "contents": [ + { + "type": "text", + "text": content, + "is_extracted": True + } + ] + } + + # Add document to message + updated_message["documents"].append(document) + + logger.info(f"Created text document '{title}' in message") + return updated_message + + def merge_document_contents(self, message: Dict[str, Any]) -> str: + """ + Merge all document contents from a message into a single text. + + Args: + message: The message containing documents + + Returns: + Combined text content from all documents + """ + if not message or "documents" not in message: + return "" + + combined_text = "" + + for document in message.get("documents", []): + source = document.get("source", {}) + doc_name = source.get("name", "Unnamed Document") + + # Extract text content + doc_text = "" + for content in document.get("contents", []): + if content.get("type") == "text": + doc_text = content.get("text", "") + break + + if doc_text: + combined_text += f"\n\n--- {doc_name} ---\n\n{doc_text}" + + return combined_text.strip() + +# Factory function +def get_document_handler(workflow_id: str = None, lucydom_interface = None, ai_service = None) -> DocumentHandler: + """Get a document handler instance.""" + return DocumentHandler(workflow_id, lucydom_interface, ai_service) \ No newline at end of file diff --git a/gwserver/modules/agentservice_filemanager.py b/gwserver/modules/agentservice_filemanager.py index 99203c30..bcdfaffa 100644 --- a/gwserver/modules/agentservice_filemanager.py +++ b/gwserver/modules/agentservice_filemanager.py @@ -34,6 +34,8 @@ class FileExtractionError(Exception): """Exception for file extraction errors.""" pass + + class FileManager: """Central file management for the Agentservice.""" @@ -54,31 +56,30 @@ class FileManager: # Import utilities # Instead of storing file_utils, we'll use the imported functions directly - + async def read_file_contents(self, file_contexts: List[Dict[str, Any]], lucydom_interface, workflow_id: str = None, add_log_func = None, - ai_service = None # AI service parameter for image analysis + ai_service = None, + extraction_context: str = None # Add this parameter ) -> Dict[str, Dict[str, Any]]: """ - Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch. - Verwendet LucyDOM-Interface statt direkter Dateizugriffe. - Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück. + Read file contents with optional contextual extraction. Args: - file_contexts: Liste der Dateikontexte mit Metadaten - lucydom_interface: LucyDOM-Interface für Dateizugriffe - workflow_id: Optionale ID des Workflows für Logging - add_log_func: Optionale Funktion für das Hinzufügen von Logs - ai_service: Optionaler AI-Service für die Bildanalyse + file_contexts: List of file contexts with metadata + lucydom_interface: LucyDOM interface for file access + workflow_id: Optional workflow ID for logging + add_log_func: Optional function for adding logs + ai_service: AI service for image analysis + extraction_context: Optional context prompt for extraction Returns: - Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...}) + Dictionary with file contents and metadata """ - file_contents = {} - + file_contents = {} # Add debug logging logger.info(f"Reading contents of {len(file_contexts)} files for workflow {workflow_id}") @@ -88,8 +89,6 @@ class FileManager: file_type = file.get("type", "unknown") content_type = file.get("content_type") - print("DEGUB5:",file_name,file_type) - try: # Dateiinhalt über LucyDOM-Interface abrufen file_data = await lucydom_interface.read_file_content(file_id) @@ -107,24 +106,26 @@ class FileManager: logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})") - # Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar + # For image analysis, add extraction context if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')): if ai_service and hasattr(ai_service, "analyze_image"): try: + # Use extraction context if provided + prompt = extraction_context or "Describe this image in detail" + image_analysis = await ai_service.analyze_image( image_data=file_data, - prompt="Describe this image in detail", + prompt=prompt, # Use contextual prompt mime_type=content_type ) - logger.debug(f"Image analysis successfully generated for {file_name}") - file_contents[file_id] = { "content": f"Image Analysis:\n{image_analysis}", - "is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion + "is_extracted": True, # Mark as extracted "name": file_name, "type": file_type, - "content_type": content_type + "content_type": content_type, + "extraction_context": prompt # Store the used prompt } _log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info") except Exception as e: @@ -189,51 +190,43 @@ class FileManager: @staticmethod def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]: """ - Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde. + Add a file to a message with consistent document structure. Args: - message: Die zu erweiternde Nachricht - file_data: Dateimetadaten und Inhalt + message: The message to add the file to + file_data: File metadata and content Returns: - Die aktualisierte Nachricht mit der Datei + Updated message with the file added """ - # Detailliertes Logging für Debugging logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})") # Initialize documents array if needed if "documents" not in message: message["documents"] = [] - logger.debug("Initialized empty documents array in message") # Create a unique ID for the document if not provided doc_id = file_data.get("id", f"file_{uuid.uuid4()}") - # Extract file size if available + # Extract metadata file_size = file_data.get("size") if isinstance(file_size, str) and file_size.isdigit(): file_size = int(file_size) elif file_size is None and file_data.get("content"): - # Estimate size from content if not provided file_size = len(file_data.get("content", "")) - # Bestimmen, ob der Inhalt bereits extrahiert wurde + # Determine if content is already extracted content = file_data.get("content", "No content available") file_name = file_data.get("name", "unnamed_file") content_type = file_data.get("content_type") - - # Prüfen, ob der Inhalt als extrahiert markiert werden sollte is_extracted = file_data.get("is_extracted", False) - if not is_extracted and isinstance(content, str) and content.strip() and file_name: - # Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp - is_extracted = is_text_extractable(file_name, content_type) - # Create standard document structure that matches the data model + # Create standard document structure that follows the data model document = { - "id": doc_id, + "id": f"doc_{uuid.uuid4()}", # Unique document ID separate from file ID "source": { "type": "file", - "id": file_data.get("id", doc_id), + "id": doc_id, "name": file_name, "content_type": content_type, "size": file_size, @@ -243,28 +236,27 @@ class FileManager: { "type": "text", "text": content, - "is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen + "is_extracted": is_extracted, + "extraction_context": file_data.get("extraction_context", None) } ] } - # Log document structure for debugging - logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}") - - # Check if file is already in the message to avoid duplicates + # Check if file is already in the message file_already_added = any( - doc.get("source", {}).get("id") == file_data.get("id") + doc.get("source", {}).get("id") == doc_id for doc in message.get("documents", []) ) if not file_already_added: message["documents"].append(document) - logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)") + logger.info(f"File {file_name} added to message (total: {len(message.get('documents', []))} files)") else: logger.info(f"File {file_name} already exists in message, skipping") return message + async def analyze_file(self, file_id: int, prompt: str, lucydom_interface, ai_service) -> Dict[str, Any]: """ Analyze a file using the appropriate method based on file type. @@ -755,6 +747,85 @@ class FileManager: return file_contexts + def create_document_reference(self, message: Dict[str, Any], file_id: int, reference_type: str = "reference") -> Dict[str, Any]: + """ + Create a document reference without loading content. + + Args: + message: The message to add the reference to + file_id: ID of the file to reference + reference_type: Type of reference (reference, citation, etc.) + + Returns: + Updated message with the document reference + """ + if not self.lucydom_interface: + logger.warning("LucyDOM interface not available for document reference") + return message + + # Get file metadata + file = self.lucydom_interface.get_file(file_id) + if not file: + logger.warning(f"File with ID {file_id} not found for reference") + return message + + # Create document structure with just the reference + document = { + "id": f"ref_{uuid.uuid4()}", + "source": { + "type": "file", + "id": str(file_id), + "name": file.get("name", "referenced_file"), + "content_type": file.get("content_type"), + "size": file.get("size"), + "reference_type": reference_type + }, + "contents": [] # Empty contents - will be loaded on demand + } + + # Add to message + updated_message = message.copy() + if "documents" not in updated_message: + updated_message["documents"] = [] + + updated_message["documents"].append(document) + logger.info(f"Added document reference for file {file.get('name')} (ID: {file_id})") + + return updated_message + + def should_extract_document(self, document: Dict[str, Any], context_prompt: str = None) -> bool: + """ + Determine if a document needs content extraction. + + Args: + document: The document object + context_prompt: Current context prompt + + Returns: + True if extraction is needed, False otherwise + """ + # If document has no contents, extraction is needed + if not document.get("contents"): + return True + + # If document has contents but extraction status is False, extraction may be needed + for content in document.get("contents", []): + if content.get("type") == "text": + # If already extracted, check if context has changed + if content.get("is_extracted", False): + # If context prompt is different from what was used previously, + # we may need to re-extract with the new context + prev_context = content.get("extraction_context") + if context_prompt and prev_context != context_prompt: + return True + return False + return True + + # Default to needing extraction + return True + + + # Factory method @staticmethod def get_instance(): @@ -763,7 +834,6 @@ class FileManager: FileManager._instance = FileManager() return FileManager._instance - # Create a singleton instance for module-level access file_manager = FileManager.get_instance() @@ -772,6 +842,8 @@ def get_file_manager(): return file_manager + + class WorkflowFileManager: """ Specialized file manager for workflow operations. @@ -789,6 +861,7 @@ class WorkflowFileManager: self.workflow_id = workflow_id self.lucydom_interface = lucydom_interface self.file_manager = get_file_manager() + self.document_handler = None def set_workflow_id(self, workflow_id: str): """Set or update the workflow ID.""" @@ -813,6 +886,15 @@ class WorkflowFileManager: Returns: Updated message """ + + # If document handler is available, use it + if self.document_handler: + return await self.document_handler.add_files_to_message( + message, + file_ids, + extraction_prompt=None # Default to no extraction + ) + if not self.lucydom_interface: _log(add_log_func, self.workflow_id, "LucyDOM interface not available", "error") return message @@ -988,7 +1070,6 @@ class WorkflowFileManager: return analysis - # Export the workflow file manager factory function def get_workflow_file_manager(workflow_id: str = None, lucydom_interface = None): """Get a workflow file manager instance.""" diff --git a/gwserver/modules/agentservice_protocol.py b/gwserver/modules/agentservice_protocol.py new file mode 100644 index 00000000..357e66d0 --- /dev/null +++ b/gwserver/modules/agentservice_protocol.py @@ -0,0 +1,338 @@ +""" +Agent Communication Protocol module for the Agentservice. +Defines a standardized format for agents to exchange information. +""" + +import json +import uuid +from typing import Dict, Any, List, Optional +from datetime import datetime + +class AgentMessage: + """ + Standard message format for inter-agent communication. + Includes content, metadata, and document references. + """ + + def __init__( + self, + content: str, + sender_id: str, + receiver_id: Optional[str] = None, + message_type: str = "text", + metadata: Optional[Dict[str, Any]] = None, + documents: Optional[List[Dict[str, Any]]] = None, + context_id: Optional[str] = None + ): + """ + Initialize an agent message. + + Args: + content: The main message content + sender_id: ID of the sending agent + receiver_id: Optional ID of the receiving agent + message_type: Type of message (text, task, result, etc.) + metadata: Optional metadata dictionary + documents: Optional list of document references + context_id: Optional conversation context ID + """ + self.id = f"msg_{uuid.uuid4()}" + self.timestamp = datetime.now().isoformat() + self.content = content + self.sender_id = sender_id + self.receiver_id = receiver_id + self.message_type = message_type + self.metadata = metadata or {} + self.documents = documents or [] + self.context_id = context_id + + def to_dict(self) -> Dict[str, Any]: + """Convert the message to a dictionary.""" + return { + "id": self.id, + "timestamp": self.timestamp, + "content": self.content, + "sender_id": self.sender_id, + "receiver_id": self.receiver_id, + "message_type": self.message_type, + "metadata": self.metadata, + "documents": self.documents, + "context_id": self.context_id + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'AgentMessage': + """Create a message from a dictionary.""" + message = cls( + content=data.get("content", ""), + sender_id=data.get("sender_id", "unknown"), + receiver_id=data.get("receiver_id"), + message_type=data.get("message_type", "text"), + metadata=data.get("metadata", {}), + documents=data.get("documents", []), + context_id=data.get("context_id") + ) + message.id = data.get("id", message.id) + message.timestamp = data.get("timestamp", message.timestamp) + return message + + def to_json(self) -> str: + """Convert the message to a JSON string.""" + return json.dumps(self.to_dict()) + + @classmethod + def from_json(cls, json_str: str) -> 'AgentMessage': + """Create a message from a JSON string.""" + return cls.from_dict(json.loads(json_str)) + +class AgentCommunicationProtocol: + """ + Defines the protocol for agents to communicate with each other. + Provides standardized message creation and handling. + """ + + @staticmethod + def create_text_message( + content: str, + sender_id: str, + receiver_id: Optional[str] = None, + metadata: Optional[Dict[str, Any]] = None, + documents: Optional[List[Dict[str, Any]]] = None, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create a simple text message.""" + return AgentMessage( + content=content, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="text", + metadata=metadata, + documents=documents, + context_id=context_id + ) + + @staticmethod + def create_task_message( + task_description: str, + sender_id: str, + receiver_id: str, + input_data: Optional[Dict[str, Any]] = None, + documents: Optional[List[Dict[str, Any]]] = None, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create a task assignment message.""" + metadata = { + "task_type": "general", + "input_data": input_data or {}, + "priority": "normal", + "task_id": f"task_{uuid.uuid4()}" + } + + return AgentMessage( + content=task_description, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="task", + metadata=metadata, + documents=documents, + context_id=context_id + ) + + @staticmethod + def create_result_message( + result_content: str, + sender_id: str, + receiver_id: str, + task_id: str, + output_data: Optional[Dict[str, Any]] = None, + result_format: str = "text", + documents: Optional[List[Dict[str, Any]]] = None, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create a task result message.""" + metadata = { + "task_id": task_id, + "result_format": result_format, + "status": "completed", + "output_data": output_data or {} + } + + return AgentMessage( + content=result_content, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="result", + metadata=metadata, + documents=documents, + context_id=context_id + ) + + @staticmethod + def create_error_message( + error_description: str, + sender_id: str, + receiver_id: Optional[str] = None, + error_type: str = "general", + error_details: Optional[Dict[str, Any]] = None, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create an error message.""" + metadata = { + "error_type": error_type, + "error_details": error_details or {}, + "severity": "error" + } + + return AgentMessage( + content=error_description, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="error", + metadata=metadata, + context_id=context_id + ) + + @staticmethod + def create_document_request_message( + document_description: str, + sender_id: str, + receiver_id: str, + filters: Optional[Dict[str, Any]] = None, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create a document request message.""" + metadata = { + "request_type": "document", + "filters": filters or {}, + "request_id": f"req_{uuid.uuid4()}" + } + + return AgentMessage( + content=document_description, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="request", + metadata=metadata, + context_id=context_id + ) + + @staticmethod + def create_status_update_message( + status_description: str, + sender_id: str, + receiver_id: Optional[str] = None, + status: str = "in_progress", + progress: float = 0.0, + context_id: Optional[str] = None + ) -> AgentMessage: + """Create a status update message.""" + metadata = { + "status": status, + "progress": progress, + "update_type": "status" + } + + return AgentMessage( + content=status_description, + sender_id=sender_id, + receiver_id=receiver_id, + message_type="status", + metadata=metadata, + context_id=context_id + ) + + @staticmethod + def convert_system_message_to_agent_message(system_message: Dict[str, Any], sender_id: str) -> AgentMessage: + """ + Convert a system message to an agent message. + + Args: + system_message: Message object from the workflow + sender_id: ID of the sending agent + + Returns: + AgentMessage instance + """ + # Extract basic information + content = system_message.get("content", "") + message_id = system_message.get("id", f"msg_{uuid.uuid4()}") + timestamp = system_message.get("started_at", datetime.now().isoformat()) + + # Create metadata + metadata = { + "agent_type": system_message.get("agent_type"), + "agent_name": system_message.get("agent_name"), + "workflow_id": system_message.get("workflow_id"), + "sequence_no": system_message.get("sequence_no"), + "result_format": system_message.get("result_format"), + "original_message_id": message_id + } + + # Create agent message + agent_message = AgentMessage( + content=content, + sender_id=sender_id, + message_type="system", + metadata=metadata, + documents=system_message.get("documents", []), + context_id=system_message.get("workflow_id") + ) + + # Set original ID and timestamp + agent_message.id = message_id + agent_message.timestamp = timestamp + + return agent_message + + @staticmethod + def convert_agent_message_to_system_message(agent_message: AgentMessage) -> Dict[str, Any]: + """ + Convert an agent message to a system message. + + Args: + agent_message: The agent message to convert + + Returns: + System message dictionary + """ + message_data = agent_message.to_dict() + metadata = message_data.get("metadata", {}) + + # Create system message structure + system_message = { + "id": message_data.get("id", f"msg_{uuid.uuid4()}"), + "workflow_id": message_data.get("context_id"), + "started_at": message_data.get("timestamp", datetime.now().isoformat()), + "finished_at": datetime.now().isoformat(), + "sequence_no": metadata.get("sequence_no", 0), + + "status": "completed", + "role": "assistant", + + "data_stats": { + "processing_time": 0.0, + "token_count": 0, + "bytes_sent": 0, + "bytes_received": 0 + }, + + "agent_type": metadata.get("agent_type"), + "agent_id": message_data.get("sender_id"), + "agent_name": metadata.get("agent_name"), + "result_format": metadata.get("result_format", "text"), + + "content": message_data.get("content", ""), + "documents": message_data.get("documents", []) + } + + # If this is a result message, add more metadata + if message_data.get("message_type") == "result": + system_message["output_data"] = metadata.get("output_data", {}) + system_message["task_id"] = metadata.get("task_id") + + return system_message + +# Factory function +def get_agent_protocol(): + """Get the agent communication protocol.""" + return AgentCommunicationProtocol \ No newline at end of file diff --git a/gwserver/modules/agentservice_registry.py b/gwserver/modules/agentservice_registry.py index 0e6853b6..a2149902 100644 --- a/gwserver/modules/agentservice_registry.py +++ b/gwserver/modules/agentservice_registry.py @@ -30,6 +30,9 @@ class AgentRegistry: if AgentRegistry._instance is not None: raise RuntimeError("Singleton instance already exists - use get_instance()") self.agents = {} + self.ai_service = None + self.document_handler = None + self.lucydom_interface = None self._load_agents() def _load_agents(self): @@ -48,10 +51,7 @@ class AgentRegistry: for module_name in agent_modules: try: # Import the module - try: - module = importlib.import_module(f"modules.{module_name}") - except ImportError: - module = importlib.import_module(module_name) + module = importlib.import_module(f"modules.{module_name}") # Look for the agent class or a get_*_agent function agent_type = module_name.split('_')[-1] @@ -79,11 +79,33 @@ class AgentRegistry: logger.warning(f"No agent class or getter function found in module {module_name}") except ImportError as e: - logger.warning(f"Module {module_name} could not be imported: {e}") + logger.error(f"Module {module_name} could not be imported: {e}") except Exception as e: logger.error(f"Error loading agent from module {module_name}: {e}") - - def register_agent(self, agent: BaseAgent): + + def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None): + """ + Set system dependencies for all agents. + + Args: + ai_service: AI service for text generation + document_handler: Document handler for document operations + lucydom_interface: LucyDOM interface for database access + """ + self.ai_service = ai_service + self.document_handler = document_handler + self.lucydom_interface = lucydom_interface + + # Update dependencies for all registered agents + for agent_id, agent in self.agents.items(): + if hasattr(agent, 'set_dependencies'): + agent.set_dependencies( + ai_service=ai_service, + document_handler=document_handler, + lucydom_interface=lucydom_interface + ) + + def register_agent(self, agent: 'BaseAgent'): """ Register an agent in the registry. @@ -91,10 +113,22 @@ class AgentRegistry: agent: The agent to register """ agent_type = agent.type + agent_id = getattr(agent, 'id', agent_type) + + # Initialize enhanced agents with dependencies + if hasattr(agent, 'set_dependencies'): + agent.set_dependencies( + ai_service=self.ai_service, + document_handler=self.document_handler, + lucydom_interface=self.lucydom_interface + ) + self.agents[agent_type] = agent - # Also register by ID - self.agents[agent.id] = agent - logger.debug(f"Agent '{agent.name}' (Type: {agent_type}) registered") + # Also register by ID if it's different from type + if agent_id != agent_type: + self.agents[agent_id] = agent + + logger.debug(f"Agent '{agent.name}' (Type: {agent_type}, ID: {agent_id}) registered") def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]: """ @@ -199,22 +233,56 @@ class AgentRegistry: for agent in self.agents.values(): if agent not in seen_agents: # Get agent info - agent_info = agent.get_agent_info() - agent_id = agent_info["id"] + agent_id = getattr(agent, 'id', agent.type) - # Extract capabilities - capabilities = agent_info.get("capabilities", "") + # Extract capabilities - check for get_capabilities method first + if hasattr(agent, 'get_capabilities') and callable(getattr(agent, 'get_capabilities')): + capabilities = agent.get_capabilities() + else: + # Fall back to string parsing + capabilities_str = getattr(agent, 'capabilities', "") + capabilities = [kw.strip().lower() for kw in capabilities_str.split(',') if kw.strip()] - # Split capabilities into keywords - if capabilities: - keywords = [kw.strip().lower() for kw in capabilities.split(',')] - - # Add each keyword to the mapping - for keyword in keywords: - if keyword not in capabilities_map: - capabilities_map[keyword] = [] - capabilities_map[keyword].append(agent_id) + # Add each capability to the mapping + for capability in capabilities: + if capability not in capabilities_map: + capabilities_map[capability] = [] + if agent_id not in capabilities_map[capability]: + capabilities_map[capability].append(agent_id) seen_agents.add(agent) - return capabilities_map \ No newline at end of file + return capabilities_map + + def get_agent_by_capability(self, capability: str) -> Optional['BaseAgent']: + """ + Find an agent with a specific capability. + + Args: + capability: The required capability + + Returns: + Agent with the required capability, or None if not found + """ + # Create mapping of capabilities for faster lookup + capability_map = self.get_agent_capabilities() + + # Look for the capability (case-insensitive) + capability = capability.lower() + matching_agents = [] + + # Direct match + if capability in capability_map: + matching_agents = capability_map[capability] + else: + # Partial matches + for cap, agents in capability_map.items(): + if capability in cap or cap in capability: + matching_agents.extend(agents) + + # Return the first matching agent + if matching_agents: + agent_id = matching_agents[0] + return self.get_agent(agent_id) + + return None \ No newline at end of file diff --git a/gwserver/modules/agentservice_utils.py b/gwserver/modules/agentservice_utils.py index be462a61..141ce0fc 100644 --- a/gwserver/modules/agentservice_utils.py +++ b/gwserver/modules/agentservice_utils.py @@ -509,7 +509,8 @@ class LoggingUtils: "agents": "Agent Selection & Execution", "files": "File Processing", "summary": "Results Summary", - "error": "Error Handling" + "error": "Error Handling", + "code": "Code Execution", } def set_workflow_id(self, workflow_id: str): diff --git a/gwserver/modules/agentservice_workflow_execution.py b/gwserver/modules/agentservice_workflow_execution.py index a0cbb792..3fd4f1be 100644 --- a/gwserver/modules/agentservice_workflow_execution.py +++ b/gwserver/modules/agentservice_workflow_execution.py @@ -1,6 +1,6 @@ """ -Refactored workflow execution for the Agentservice. -Implements a structured workflow with clear separation of planning and execution phases. +Refactored architecture for the Agentservice multi-agent system. +This module defines the revised workflow execution with improved agent handovers. """ import os @@ -10,18 +10,12 @@ import uuid from datetime import datetime from typing import List, Dict, Any, Optional, Tuple, Union -# Import utility module (will be created) -from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils - -# Import for data extraction -from modules.agentservice_dataextraction import data_extraction - logger = logging.getLogger(__name__) class WorkflowExecution: """ - Handles the execution of workflows in a structured, multi-phase approach. - Separates planning from execution and provides better logging. + Handles the execution of workflows with improved agent collaboration. + Integrates planning and execution phases for better context awareness. """ def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface): @@ -33,14 +27,25 @@ class WorkflowExecution: self.ai_service = ai_service self.lucydom_interface = lucydom_interface + # Import necessary modules + from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils + from modules.agentservice_registry import AgentRegistry + from modules.agentservice_filemanager import get_workflow_file_manager + # Initialize utilities self.workflow_utils = WorkflowUtils(workflow_id) self.message_utils = MessageUtils() self.logging_utils = LoggingUtils(workflow_id, self._add_log) + # Initialize agent registry + self.agent_registry = AgentRegistry.get_instance() + + # Initialize file manager + self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface) + async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False): """ - Execute the workflow following the new structured approach. + Execute the workflow with integrated planning and agent selection. Args: message: The initiating message (prompt or user input) @@ -52,47 +57,23 @@ class WorkflowExecution: Dict with workflow status and result """ try: - # 1. Initialize the workflow (already done by the caller) + # 1. Initialize workflow logging self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized") - # 2. Create a message with user input - user_message = self._create_message(workflow, message.get("role", "user")) - user_message["content"] = message.get("content", "") - - # Process files if provided - if files and len(files) > 0: - self.logging_utils.info(f"Processing {len(files)} files", "files", f"Processing files: {[f.get('name', 'unknown') for f in files]}") - await self._process_files(workflow, user_message, files) - - # Add the message to the workflow - if "messages" not in workflow: - workflow["messages"] = [] - workflow["messages"].append(user_message) - - # Save workflow state - self.workflow_manager._save_workflow(workflow) + # 2. Process user message and files + user_message = await self._process_user_message(workflow, message, files) self.logging_utils.info("User message processed", "workflow", "User input added to workflow") - # 3. Create work plan using AI - work_plan = await self._create_work_plan(workflow, user_message) - self.logging_utils.info(f"Created work plan with {len(work_plan)} activities", "planning", "Work plan created") + # 3. Create agent-aware work plan + work_plan = await self._create_agent_aware_work_plan(workflow, user_message) + self.logging_utils.info(f"Created agent-aware work plan with {len(work_plan)} activities", "planning") - # 4. Execute each activity in the work plan - results = [] - for i, activity in enumerate(work_plan, 1): - self.logging_utils.info(f"Starting activity {i}/{len(work_plan)}: {activity.get('title', 'Unnamed')}", - "execution", f"Activity: {activity.get('title', 'Unnamed')}") - - # Execute the activity - activity_result = await self._execute_activity(workflow, activity) - results.append(activity_result) - - # Save intermediate state - self.workflow_manager._save_workflow(workflow) - - # 5. Create summary for the user + # 4. Execute the activities in the work plan + results = await self._execute_work_plan(workflow, work_plan) + + # 5. Create summary summary = await self._create_summary(workflow, results) - self.logging_utils.info("Created workflow summary", "summary", "Workflow summary created") + self.logging_utils.info("Created workflow summary", "summary") # Set workflow status to completed workflow["status"] = "completed" @@ -108,7 +89,7 @@ class WorkflowExecution: } except Exception as e: - self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error", f"Error: {str(e)}") + self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error") workflow["status"] = "failed" self.workflow_manager._save_workflow(workflow) @@ -117,107 +98,111 @@ class WorkflowExecution: "status": "failed", "error": str(e) } - - async def _process_files(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]]): + + async def _process_user_message(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]: """ - Process files and add them to the message. - Extracts text content where possible. + Process the user message and add it to the workflow. Args: workflow: The workflow object - message: The message to add files to - files: List of file metadata - """ - # Import necessary modules - from modules.agentservice_filemanager import get_file_manager - # Get the file manager instance - file_manager = get_file_manager() - - # Prepare file contexts - file_contexts = file_manager.prepare_file_contexts(files) - self.logging_utils.info(f"Prepared contexts for {len(file_contexts)} files", "files", "File contexts prepared") - - # Read file contents - file_contents = await file_manager.read_file_contents( - file_contexts, - self.lucydom_interface, - self.workflow_id, - self._add_log, - self.ai_service - ) - - # Add files to message - for file_id, content in file_contents.items(): - file_metadata = next((f for f in files if f.get('id') == file_id), {}) + message: The user message + files: Optional list of file metadata - file_data = { - "id": file_id, - "name": file_metadata.get('name', 'unnamed_file'), - "content_type": file_metadata.get('content_type'), - "type": file_metadata.get('type', "unknown"), - "content": content.get("content", "") if isinstance(content, dict) else content, - "size": file_metadata.get('size'), - "is_extracted": content.get("is_extracted", False) if isinstance(content, dict) else False - } - - self.logging_utils.info(f"Adding file {file_data['name']} to message", "files", f"Adding file: {file_data['name']}") - file_manager.add_file_to_message(message, file_data) - - async def _create_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]: + Returns: + The processed user message """ - Create a structured work plan based on the user's request. + # Create a message with user input + user_message = self._create_message(workflow, message.get("role", "user")) + user_message["content"] = message.get("content", "") + + # Process files if provided + if files and len(files) > 0: + self.logging_utils.info(f"Processing {len(files)} files", "files") + + # Add files to message via file manager instead of _process_files + user_message = await self.file_manager.add_files_to_message( + user_message, + [f.get('id') for f in files], + self._add_log + ) + + # Add the message to the workflow + if "messages" not in workflow: + workflow["messages"] = [] + workflow["messages"].append(user_message) + + # Save workflow state + self.workflow_manager._save_workflow(workflow) + + return user_message + + async def _create_agent_aware_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]: + """ + Create an agent-aware work plan that integrates agent selection during planning. Args: workflow: The workflow object message: The initiating message Returns: - List of structured activities to execute + List of structured activities with agent assignments """ # Extract context information task = message.get("content", "") + + # Get all available agents and their capabilities + agent_infos = self.agent_registry.get_agent_infos() + + # Extract documents documents = message.get("documents", []) + document_info = [] + for doc in documents: + source = doc.get("source", {}) + document_info.append({ + "id": doc.get("id"), + "name": source.get("name", "unnamed"), + "type": source.get("type", "unknown"), + "content_type": source.get("content_type", "unknown") + }) - # Create the planning prompt + # Create the planning prompt with agent awareness plan_prompt = f""" - As an AI workflow manager, create a detailed work plan for the following task: +As an AI workflow manager, create a detailed agent-aware work plan for the following task: + +TASK: {task} + +AVAILABLE AGENTS: +{self._format_agent_info(agent_infos)} + +AVAILABLE DOCUMENTS: +{document_info if document_info else "No documents provided"} + +The work plan should include a structured list of activities. Each activity should have: +1. title - A short descriptive title for the activity +2. description - What needs to be done in this activity +3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence) +4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents) +5. document_requirements - Description of which documents are needed for this activity +6. expected_output - The expected output format and content +7. dependencies - List of previous activities this depends on (by index) + +IMPORTANT GUIDELINES: +- Each activity should have clear objectives and be assigned to the most appropriate agent(s) +- When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them +- Documents are processed on-demand, so each activity should specify which documents it requires +- Create a logical sequence where later activities can use outputs from earlier ones +- If no specialized agent is needed for a task, use the default "assistant" agent + +Return the work plan as a JSON array of activity objects, each with the above properties. +""" - TASK: {task} - - The work plan should include a structured list of activities. Each activity should have: - 1. title - A short descriptive title for the activity - 2. description - What needs to be done in this activity - 3. agent_prompt - A complete prompt to give to the AI agent(s) for this activity - 4. data_prompt - A prompt describing what data will be needed for this activity - 5. expected_format - The expected output format (e.g., "Text", "JSON", "Table", "FileList") - 6. dependencies - List of previous activities this depends on (by index) - - Return the work plan as a JSON array of activity objects, each with the above properties. - The work plan should be logical, efficient, and comprehensively address the task. - """ - - # Add information about available documents if present - if documents: - doc_info = [] - for doc in documents: - source = doc.get("source", {}) - doc_info.append({ - "name": source.get("name", "unnamed"), - "type": source.get("type", "unknown"), - "content_type": source.get("content_type", "unknown") - }) - - plan_prompt += f"\n\nAvailable documents: {doc_info}" - - self.logging_utils.info("Requesting AI work plan", "planning", "Generating work plan") + self.logging_utils.info("Creating agent-aware work plan", "planning") # Call AI to generate work plan try: plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}]) - print("DEBUG prompt=",plan_prompt," Response=",plan_response) - - # Extract JSON plan (using a helper utility) + # Extract JSON plan import json import re @@ -228,295 +213,245 @@ class WorkflowExecution: if json_match: json_str = json_match.group(0) work_plan = json.loads(json_str) - self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning", - f"Work plan activities: {[activity.get('title', 'Unnamed') for activity in work_plan]}") + self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning") return work_plan else: - self.logging_utils.warning("Could not extract JSON from AI response", "planning", - "Fallback to default work plan") + self.logging_utils.warning("Could not extract JSON from AI response", "planning") # Fallback: Create a simple default work plan return [{ "title": "Process Task", "description": "Process the user's request directly", - "agent_prompt": task, - "data_prompt": "All available data is needed for this task", - "expected_format": "Text", + "assigned_agents": ["assistant"], + "agent_prompts": [task], + "document_requirements": "All available documents may be needed", + "expected_output": "Text", "dependencies": [] }] except Exception as e: - self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning", f"Work plan error: {str(e)}") + self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning") # Return a minimal fallback plan return [{ "title": "Process Task (Error Recovery)", "description": "Process the user's request after planning error", - "agent_prompt": task, - "data_prompt": "All available data is needed for this task", - "expected_format": "Text", + "assigned_agents": ["assistant"], + "agent_prompts": [task], + "document_requirements": "All available documents may be needed", + "expected_output": "Text", "dependencies": [] }] - - async def _execute_activity(self, workflow: Dict[str, Any], activity: Dict[str, Any]) -> Dict[str, Any]: + + async def _execute_work_plan(self, workflow: Dict[str, Any], work_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """ - Execute a single activity from the work plan. + Execute all activities in the work plan with proper agent handovers. Args: workflow: The workflow object - activity: The activity definition from the work plan + work_plan: The work plan with activities Returns: - Result of the activity execution + Results from all activities """ - # Extract activity information - title = activity.get("title", "Unnamed Activity") - agent_prompt = activity.get("agent_prompt", "") - data_prompt = activity.get("data_prompt", "") - expected_format = activity.get("expected_format", "Text") + results = [] + activity_outputs = {} # Store outputs for dependency resolution - self.logging_utils.info(f"Executing activity: {title}", "execution", f"Activity: {title}, Format: {expected_format}") - - # 1. Determine which agents to use - agents_config = await self._select_agents(workflow, agent_prompt, expected_format) - self.logging_utils.info(f"Selected {len(agents_config)} agents for execution", "agents", - f"Agents: {[agent.get('agent_id', 'unknown') for agent in agents_config]}") - - # 2. Extract the necessary data - from modules.agentservice_registry import AgentRegistry - registry = AgentRegistry.get_instance() - - # If no agents were selected, use the moderator directly - if not agents_config: - self.logging_utils.info("No specific agents selected, using moderator", "agents", "Using moderator") - # Create a message with the moderator's response - moderator_message = self._create_message(workflow, "assistant") - moderator_message["content"] = f"No specialized agents needed for this task. Processing directly: {agent_prompt}" - moderator_message["agent_type"] = "moderator" - moderator_message["agent_id"] = "moderator" - moderator_message["agent_name"] = "Moderator" + for activity_index, activity in enumerate(work_plan): + # Extract activity info + title = activity.get("title", f"Activity {activity_index+1}") + description = activity.get("description", "") + assigned_agents = activity.get("assigned_agents", ["assistant"]) + agent_prompts = activity.get("agent_prompts", [description]) + doc_requirements = activity.get("document_requirements", "") + expected_output = activity.get("expected_output", "Text") + dependencies = activity.get("dependencies", []) - # Add message to workflow - workflow["messages"].append(moderator_message) + self.logging_utils.info(f"Starting activity: {title}", "execution") - # Direct AI call for simple result - result_content = await self.ai_service.call_api([ - {"role": "system", "content": "You are a helpful assistant processing the user's request."}, - {"role": "user", "content": agent_prompt} - ]) + # Validate assigned_agents and agent_prompts + if len(assigned_agents) > len(agent_prompts): + # Duplicate the last prompt for additional agents + agent_prompts.extend([agent_prompts[-1]] * (len(assigned_agents) - len(agent_prompts))) + elif len(agent_prompts) > len(assigned_agents): + # Truncate excess prompts + agent_prompts = agent_prompts[:len(assigned_agents)] - # Create result message - result_message = self._create_message(workflow, "assistant") - result_message["content"] = result_content - result_message["agent_type"] = "assistant" - result_message["agent_id"] = "assistant" - result_message["agent_name"] = "AI Assistant" - result_message["result_format"] = "Text" + # Process dependencies first + dependency_context = {} + for dep_index in dependencies: + if dep_index < activity_index and dep_index in activity_outputs: + dep_output = activity_outputs[dep_index] + dependency_context[f"activity_{dep_index+1}"] = dep_output - # Add message to workflow - workflow["messages"].append(result_message) - - return { - "title": title, - "content": result_content, - "agent": "assistant", - "format": "Text" - } - - # 3. Execute the agents in sequence - last_result = None - for agent_config in agents_config: - agent_id = agent_config.get("agent_id") - agent_prompt = agent_config.get("prompt") - expected_format = agent_config.get("expected_format", "Text") - - # Get the agent from registry - agent = registry.get_agent(agent_id) - if not agent: - self.logging_utils.warning(f"Agent '{agent_id}' not found, skipping", "agents", f"Agent not found: {agent_id}") - continue - - # Incorporate previous result if available - if last_result: - agent_prompt = f"{agent_prompt}\n\nPrevious result: {last_result}" - - self.logging_utils.info(f"Executing agent: {agent_id}", "agents", f"Agent: {agent_id}, Format: {expected_format}") - - # Extract any needed data - if data_prompt: - # Get all messages from the workflow - workflow_messages = workflow.get("messages", []) - - # Extract data using the dataextraction module - extracted_data = await data_extraction( - prompt=data_prompt, - files=self._extract_files_from_workflow(workflow), - messages=workflow_messages, - ai_service=self.ai_service, - lucydom_interface=self.lucydom_interface, - workflow_id=self.workflow_id, - add_log_func=self._add_log - ) - - # Add the data context to the prompt + # Extract required documents if needed + document_content = "" + if doc_requirements: + extracted_data = await self._extract_required_documents(workflow, doc_requirements) if extracted_data and "extracted_content" in extracted_data: - data_summary = "\n\nExtracted data summary:\n" + # Format document content for the prompt + document_content = "\n\n=== EXTRACTED DOCUMENT CONTENT ===\n\n" for item in extracted_data.get("extracted_content", []): - data_summary += f"- {item.get('name', 'unnamed')}: {item.get('content', '')[:100]}...\n" - - agent_prompt += data_summary + doc_name = item.get("name", "Unnamed document") + doc_content = item.get("content", "No content available") + document_content += f"--- {doc_name} ---\n{doc_content}\n\n" - # Create the agent message + # Execute the activity with the assigned agents + activity_result = await self._execute_agent_sequence( + workflow, + assigned_agents, + agent_prompts, + document_content, + dependency_context, + expected_output + ) + + # Store the result + activity_outputs[activity_index] = activity_result + results.append({ + "title": title, + "description": description, + "agents": assigned_agents, + "result": activity_result.get("content", ""), + "output_format": activity_result.get("format", "Text") + }) + + self.logging_utils.info(f"Completed activity: {title}", "execution") + + # Save intermediate state + self.workflow_manager._save_workflow(workflow) + + return results + + async def _execute_agent_sequence( + self, + workflow: Dict[str, Any], + agent_ids: List[str], + prompts: List[str], + document_content: str, + dependency_context: Dict[str, Any], + expected_output: str + ) -> Dict[str, Any]: + """ + Execute a sequence of agents with proper handovers. + + Args: + workflow: The workflow object + agent_ids: List of agent IDs to execute in sequence + prompts: List of prompts for each agent + document_content: Extracted document content + dependency_context: Context from dependent activities + expected_output: Expected output format + + Returns: + Result of the agent sequence execution + """ + context = { + "workflow_id": self.workflow_id, + "expected_format": expected_output, + "dependency_outputs": dependency_context + } + + last_result = None + last_documents = [] + + for i, agent_id in enumerate(agent_ids): + # Get the agent + agent = self.agent_registry.get_agent(agent_id) + if not agent: + self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents") + agent = self.agent_registry.get_agent("assistant") + if not agent: + # If assistant not found, create a minimal agent response + continue + + # Get the agent prompt + base_prompt = prompts[i] if i < len(prompts) else prompts[-1] + + # Enhance the prompt with context + enhanced_prompt = self._enhance_prompt( + base_prompt, + document_content, + dependency_context, + last_result.get("content", "") if last_result else "", + i > 0 # is_continuation flag + ) + + # Create the message for this agent agent_message = self._create_message(workflow, "user") - agent_message["content"] = agent_prompt - agent_message["workflow_id"] = self.workflow_id + agent_message["content"] = enhanced_prompt + + # Add any documents from previous agent if this is a continuation + if last_documents and i > 0: + agent_message["documents"] = last_documents + + # Log agent execution + self.logging_utils.info(f"Executing agent: {agent_id}", "agents") # Execute the agent - agent_response = await agent.process_message(agent_message, {"expected_format": expected_format}) + agent_response = await agent.process_message(agent_message, context) - # Process agent response - if agent_response: - # Create response message - response_message = self._create_message(workflow, "assistant") - response_message["content"] = agent_response.get("content", "") - response_message["agent_type"] = agent_id - response_message["agent_id"] = agent_id - response_message["agent_name"] = agent.name - response_message["result_format"] = agent_response.get("result_format", expected_format) - - # Add to workflow - workflow["messages"].append(response_message) - - # Update last result - last_result = agent_response.get("content", "") + # Create response message + response_message = self._create_message(workflow, "assistant") + response_message["content"] = agent_response.get("content", "") + response_message["agent_type"] = agent_id + response_message["agent_id"] = agent_id + response_message["agent_name"] = agent.name + response_message["result_format"] = agent_response.get("result_format", expected_output) + + # Capture documents from response + if "documents" in agent_response: + response_message["documents"] = agent_response["documents"] + last_documents = agent_response["documents"] + + # Add to workflow + workflow["messages"].append(response_message) + + # Update last result + last_result = { + "content": agent_response.get("content", ""), + "format": agent_response.get("result_format", expected_output), + "agent_id": agent_id, + "documents": agent_response.get("documents", []) + } - # Return the final result - return { - "title": title, - "content": last_result or "", - "agent": agent_config.get("agent_id", "unknown") if agents_config else "none", - "format": expected_format + return last_result or { + "content": "No agent response was generated.", + "format": "Text" } - async def _select_agents(self, workflow: Dict[str, Any], prompt: str, expected_format: str) -> List[Dict[str, Any]]: + async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]: """ - Select appropriate agents for a given prompt and expected format. + Extract required documents based on requirements description. Args: workflow: The workflow object - prompt: The prompt to process - expected_format: The expected output format + doc_requirements: Description of document requirements Returns: - List of agent configurations (agent_id, prompt, expected_format) + Extracted document data """ - # Get available agents - from modules.agentservice_registry import AgentRegistry - registry = AgentRegistry.get_instance() + # Import for data extraction + from modules.agentservice_dataextraction import data_extraction - # Get all agents except user_agent - system_agents = {} - for agent_id, agent in registry.get_all_agents().items(): - if agent.type != "user" and agent_id not in system_agents: - system_agents[agent_id] = agent.get_agent_info() + # Get all files from the workflow + files = self.workflow_utils.get_files(workflow) - # Create agent selection prompt - selection_prompt = f""" - You are a workflow coordinator responsible for selecting appropriate agents for a task. + # Get all messages from the workflow + workflow_messages = workflow.get("messages", []) - TASK PROMPT: {prompt} + # Extract data using the dataextraction module + extracted_data = await data_extraction( + prompt=doc_requirements, + files=files, + messages=workflow_messages, + ai_service=self.ai_service, + lucydom_interface=self.lucydom_interface, + workflow_id=self.workflow_id, + add_log_func=self._add_log + ) - EXPECTED FORMAT: {expected_format} - - AVAILABLE AGENTS: - """ - - # Add agent descriptions - for agent_id, agent_info in system_agents.items(): - selection_prompt += f""" - - ID: {agent_id} - Name: {agent_info.get('name', '')} - Type: {agent_info.get('type', '')} - Description: {agent_info.get('description', '')} - Capabilities: {agent_info.get('capabilities', '')} - Result Format: {agent_info.get('result_format', 'Text')} - """ - - selection_prompt += """ - Based on the task and expected format, select the appropriate agent(s) to use. - - Return your selection as a JSON array with objects containing: - 1. agent_id: The ID of the selected agent - 2. prompt: A specific prompt tailored for this agent - 3. expected_format: The expected output format - - You can select multiple agents if needed, in which case they will be executed in sequence. - If no specialized agent is needed, return an empty array. - """ - - # Call AI to select agents - try: - selection_response = await self.ai_service.call_api([{"role": "user", "content": selection_prompt}]) - - # Extract JSON from response - import json - import re - - # Look for JSON array - json_pattern = r'\[\s*\{.*\}\s*\]' - json_match = re.search(json_pattern, selection_response, re.DOTALL) - - if json_match: - json_str = json_match.group(0) - selected_agents = json.loads(json_str) - - # Validate selections - valid_agents = [] - for agent_config in selected_agents: - if "agent_id" in agent_config and agent_config["agent_id"] in system_agents: - valid_agents.append(agent_config) - - return valid_agents - elif "[]" in selection_response: - # Empty array - no agents needed - return [] - else: - # Could not parse response, use default strategy - self.logging_utils.warning("Could not parse agent selection response", "agents", - "Falling back to default agent selection") - - # Simple heuristic for default agent selection based on expected format - if expected_format.lower() in ["file", "filelist", "document"]: - return [{ - "agent_id": "filecreator_agent", - "prompt": prompt, - "expected_format": expected_format - }] - elif expected_format.lower() in ["report", "analysis", "document"]: - return [{ - "agent_id": "documentation_agent", - "prompt": prompt, - "expected_format": expected_format - }] - elif "web" in prompt.lower() or "search" in prompt.lower(): - return [{ - "agent_id": "webcrawler_agent", - "prompt": prompt, - "expected_format": expected_format - }] - elif "analyze" in prompt.lower() or "data" in prompt.lower(): - return [{ - "agent_id": "analyst_agent", - "prompt": prompt, - "expected_format": expected_format - }] - else: - # No specific agent needed - return [] - - except Exception as e: - self.logging_utils.error(f"Error selecting agents: {str(e)}", "agents", f"Agent selection error: {str(e)}") - return [] # Empty array - use default processing - + return extracted_data + async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]: """ Create a summary of the workflow results for the user. @@ -533,15 +468,17 @@ class WorkflowExecution: for i, result in enumerate(results, 1): title = result.get("title", f"Activity {i}") - content = result.get("content", "") - agent = result.get("agent", "unknown") + description = result.get("description", "") + content = result.get("result", "") + agents = ", ".join(result.get("agents", ["unknown"])) # Limit content length for the summary prompt content_preview = content[:500] + "..." if len(content) > 500 else content summary_prompt += f""" ACTIVITY {i}: {title} - Executed by: {agent} + Description: {description} + Executed by: {agents} {content_preview} @@ -573,10 +510,9 @@ class WorkflowExecution: workflow["messages"].append(summary_message) return summary_message - + def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]: """Create a new message object for the workflow""" - # This is a utility function that should be moved to the utility module message_id = f"msg_{uuid.uuid4()}" current_time = datetime.now().isoformat() @@ -589,6 +525,7 @@ class WorkflowExecution: message = { "id": message_id, "workflow_id": self.workflow_id, + "parent_message_id": None, "started_at": current_time, "finished_at": None, "sequence_no": sequence_no, @@ -610,32 +547,72 @@ class WorkflowExecution: return message - def _extract_files_from_workflow(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]: - """Extract file information from all messages in the workflow""" - files = [] - - # Process all messages - for message in workflow.get("messages", []): - # Extract documents from the message - for doc in message.get("documents", []): - source = doc.get("source", {}) - - # Only include file documents - if source.get("type") == "file": - file_info = { - "id": source.get("id", ""), - "name": source.get("name", ""), - "type": source.get("content_type", ""), - "content_type": source.get("content_type", "") - } - - # Check if file is already in the list (avoid duplicates) - if not any(f.get("id") == file_info["id"] for f in files): - files.append(file_info) - - return files - def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None): """Add a log entry to the workflow""" - # This actually calls back to the workflow manager's log function - self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name) \ No newline at end of file + # This calls back to the workflow manager's log function + self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name) + + def _format_agent_info(self, agent_infos: List[Dict[str, Any]]) -> str: + """Format agent information for the planning prompt""" + formatted_info = "" + for agent in agent_infos: + formatted_info += f""" + - ID: {agent.get('id', 'unknown')} + Name: {agent.get('name', '')} + Type: {agent.get('type', '')} + Description: {agent.get('description', '')} + Capabilities: {agent.get('capabilities', '')} + Result Format: {agent.get('result_format', 'Text')} + """ + return formatted_info + + def _enhance_prompt( + self, + base_prompt: str, + document_content: str, + dependency_context: Dict[str, Any], + previous_result: str, + is_continuation: bool + ) -> str: + """ + Enhance a prompt with context information. + + Args: + base_prompt: The original prompt + document_content: Extracted document content + dependency_context: Context from dependent activities + previous_result: Result from previous agent in sequence + is_continuation: Flag indicating if this is a continuation + + Returns: + Enhanced prompt + """ + enhanced_prompt = base_prompt + + # Add continuation context if this is a continuation + if is_continuation and previous_result: + enhanced_prompt = f""" +{enhanced_prompt} + +=== PREVIOUS AGENT OUTPUT === +{previous_result} +""" + + # Add document content if available + if document_content: + enhanced_prompt += f"\n\n{document_content}" + + # Add dependency context if available + if dependency_context: + dependency_section = "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n\n" + for name, value in dependency_context.items(): + if isinstance(value, dict) and "content" in value: + # Extract content if it's in the standard format + dependency_section += f"--- {name} ---\n{value['content']}\n\n" + else: + # Use the value directly + dependency_section += f"--- {name} ---\n{str(value)}\n\n" + + enhanced_prompt += dependency_section + + return enhanced_prompt \ No newline at end of file diff --git a/gwserver/modules/agentservice_workflow_manager.py b/gwserver/modules/agentservice_workflow_manager.py index 4183fe08..157ca53c 100644 --- a/gwserver/modules/agentservice_workflow_manager.py +++ b/gwserver/modules/agentservice_workflow_manager.py @@ -12,8 +12,37 @@ from typing import List, Dict, Any, Optional, Tuple, Union logger = logging.getLogger(__name__) class WorkflowManager: - # Previous code is in the first part - + + def __init__(self, mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None): + """Initialize the WorkflowManager.""" + self.mandate_id = mandate_id + self.user_id = user_id + self.ai_service = ai_service + self.lucydom_interface = lucydom_interface + + # Cache for workflows + self.workflows = {} + + # Directory for results + self.results_dir = os.path.join("results", "workflows") + os.makedirs(self.results_dir, exist_ok=True) + + # Initialize document handler + from modules.agentservice_document_handler import get_document_handler + self.document_handler = get_document_handler( + lucydom_interface=lucydom_interface, + ai_service=ai_service + ) + + # Initialize agent registry with dependencies + from modules.agentservice_registry import AgentRegistry + registry = AgentRegistry.get_instance() + registry.set_dependencies( + ai_service=ai_service, + document_handler=self.document_handler, + lucydom_interface=lucydom_interface + ) + async def list_workflows(self, mandate_id: int = None, user_id: int = None) -> List[Dict[str, Any]]: """ List all available workflows. @@ -106,7 +135,193 @@ class WorkflowManager: except Exception as e: logger.error(f"Error listing workflows: {str(e)}") return [] - + + async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + Execute a workflow with the given message and files. + + Args: + message: Input message (prompt) + files: Optional list of file metadata + + Returns: + Workflow execution result + """ + # Generate workflow ID + workflow_id = f"wf_{uuid.uuid4()}" + + # Initialize the workflow + workflow = self._initialize_workflow(workflow_id) + + # Capture start time + start_time = datetime.now() + + try: + # NEW: Create WorkflowExecution with document handler + from modules.agentservice_workflow_execution import WorkflowExecution + execution = WorkflowExecution( + workflow_manager=self, + workflow_id=workflow_id, + mandate_id=self.mandate_id, + user_id=self.user_id, + ai_service=self.ai_service, + lucydom_interface=self.lucydom_interface + ) + + # Set the document handler's workflow ID + self.document_handler.set_workflow_id(workflow_id) + + # Execute the workflow + result = await execution.execute(message, workflow, files) + + # Calculate duration + duration = (datetime.now() - start_time).total_seconds() + + # Update workflow stats + workflow["data_stats"]["total_processing_time"] = duration + workflow["completed_at"] = datetime.now().isoformat() + + # Save final state + self._save_workflow(workflow) + + return result + + except Exception as e: + logger.error(f"Error executing workflow: {str(e)}", exc_info=True) + + # Update workflow status + workflow["status"] = "failed" + workflow["last_activity"] = datetime.now().isoformat() + self._add_log(workflow, f"Workflow execution failed: {str(e)}", "error") + + # Save failed state + self._save_workflow(workflow) + + return { + "workflow_id": workflow_id, + "status": "failed", + "error": str(e) + } + + def _save_workflow(self, workflow: Dict[str, Any]) -> bool: + """ + Save workflow state to database and/or file. + Enhanced to handle structured documents. + + Args: + workflow: The workflow object to save + + Returns: + True if saved successfully, False otherwise + """ + try: + workflow_id = workflow.get("id") + + # Update in-memory cache + self.workflows[workflow_id] = workflow + + # Update in database if available + if self.lucydom_interface: + # NEW: Enhanced document handling for database persistence + # Create a copy of the workflow for database storage + db_workflow = workflow.copy() + + # Process messages to ensure documents are properly formatted + if "messages" in db_workflow: + for i, message in enumerate(db_workflow["messages"]): + # ensure large document contents are truncated for database storage + if "documents" in message: + for j, doc in enumerate(message["documents"]): + if "contents" in doc: + for k, content in enumerate(doc["contents"]): + if content.get("type") == "text" and "text" in content: + # limit text size for database storage + text = content["text"] + if len(text) > 1000: # Reasonable size for preview + db_workflow["messages"][i]["documents"][j]["contents"][k]["text"] = \ + text[:1000] + "... [truncated for storage]" + + # Save to database + try: + self.lucydom_interface.save_workflow_state(db_workflow) + logger.info(f"Workflow {workflow_id} saved to database") + except Exception as db_error: + logger.error(f"Error saving workflow to database: {str(db_error)}") + # Continue to file saving even if database fails + + # Save to file (always do this as backup) + import json + workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") + + with open(workflow_path, 'w', encoding='utf-8') as f: + json.dump(workflow, f, indent=2, ensure_ascii=False) + + logger.info(f"Workflow {workflow_id} saved to file: {workflow_path}") + return True + + except Exception as e: + logger.error(f"Error saving workflow state: {str(e)}") + return False + + async def load_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]: + """ + Load a workflow by ID. + Enhanced to ensure document handler is properly configured. + + Args: + workflow_id: ID of the workflow to load + + Returns: + The workflow object or None if not found + """ + # Check memory cache first + if workflow_id in self.workflows: + workflow = self.workflows[workflow_id] + + # NEW: Configure document handler for this workflow + self.document_handler.set_workflow_id(workflow_id) + + return workflow + + # Try to load from database + if self.lucydom_interface: + try: + workflow = self.lucydom_interface.load_workflow_state(workflow_id) + if workflow: + # Cache in memory + self.workflows[workflow_id] = workflow + + # NEW: Configure document handler for this workflow + self.document_handler.set_workflow_id(workflow_id) + + logger.info(f"Workflow {workflow_id} loaded from database") + return workflow + except Exception as e: + logger.error(f"Error loading workflow from database: {str(e)}") + + # Try to load from file + workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json") + + if os.path.exists(workflow_path): + try: + import json + with open(workflow_path, 'r', encoding='utf-8') as f: + workflow = json.load(f) + + # Cache in memory + self.workflows[workflow_id] = workflow + + # NEW: Configure document handler for this workflow + self.document_handler.set_workflow_id(workflow_id) + + logger.info(f"Workflow {workflow_id} loaded from file: {workflow_path}") + return workflow + except Exception as e: + logger.error(f"Error loading workflow from file: {str(e)}") + + logger.warning(f"Workflow {workflow_id} not found") + return None + async def delete_workflow(self, workflow_id: str) -> bool: """ Delete a workflow. @@ -425,15 +640,16 @@ class WorkflowManager: return workflow.get("messages", []) if workflow else None # Factory function for WorkflowManager -def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None): +def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None): """ Get a WorkflowManager instance for the specified context. - Reuses existing instances. + Reuses existing instances and updates dependencies. Args: mandate_id: Mandate ID user_id: User ID ai_service: AI service + lucydom_interface: LucyDOM interface Returns: WorkflowManager instance @@ -442,8 +658,9 @@ def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service context_key = f"{mandate_id}_{user_id}" - # LucyDOM interface for database access - lucydom_interface = get_lucydom_interface(mandate_id, user_id) + # Get LucyDOM interface if not provided + if not lucydom_interface: + lucydom_interface = get_lucydom_interface(mandate_id, user_id) if context_key not in _workflow_managers: _workflow_managers[context_key] = WorkflowManager( @@ -453,9 +670,18 @@ def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service lucydom_interface ) - # Update services if changed + # Update services if provided if ai_service is not None: _workflow_managers[context_key].ai_service = ai_service + + # NEW: Update document handler's AI service + if hasattr(_workflow_managers[context_key], 'document_handler'): + _workflow_managers[context_key].document_handler.set_ai_service(ai_service) + + # NEW: Update agent registry dependencies + from modules.agentservice_registry import AgentRegistry + registry = AgentRegistry.get_instance() + registry.set_dependencies(ai_service=ai_service) return _workflow_managers[context_key] diff --git a/gwserver/old_modules_copy/agentservice_agent_coder.py b/gwserver/old_modules_copy/agentservice_agent_coder.py new file mode 100644 index 00000000..4bedebaa --- /dev/null +++ b/gwserver/old_modules_copy/agentservice_agent_coder.py @@ -0,0 +1,500 @@ +""" +Simplified Coder Agent for developing and executing Python code. +This agent uses the CodeExecutor from the helper module to execute code. +""" + +import logging +import json +import re +import uuid +import traceback +from datetime import datetime +from typing import List, Dict, Any, Optional, Tuple + +from modules.agentservice_base import BaseAgent +from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils +from connectors.connector_aichat_openai import ChatService + +logger = logging.getLogger(__name__) + +class CoderAgent(BaseAgent): + """Agent for developing and executing Python code""" + + def __init__(self): + """Initialize the coder agent with proper type and capabilities""" + super().__init__() + + # Agent metadata + self.id = "coder" + self.type = "coder" + self.name = "Python Code Agent" + self.description = "Develops and executes Python code" + self.capabilities = "code_development,data_processing,file_processing,automation" + self.result_format = "python_code" + + # Init utilities + self.file_utils = FileUtils() + self.message_utils = MessageUtils() + + # Executor settings + self.executor_timeout = 60 # seconds + self.executor_memory_limit = 512 # MB + + # AI service settings + self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation + self.ai_max_tokens = 2000 # Enough tokens for complex code + + def get_agent_info(self) -> Dict[str, Any]: + """Get agent information for agent registry""" + return { + "id": self.id, + "type": self.type, + "name": self.name, + "description": self.description, + "capabilities": self.capabilities, + "result_format": self.result_format, + "metadata": { + "timeout": self.executor_timeout, + "memory_limit": self.executor_memory_limit + } + } + + async def process_message(self, message: Dict[str, Any], + workflow: Dict[str, Any], + context: Dict[str, Any] = None, + log_func=None) -> Dict[str, Any]: + """ + Processes a message to develop and execute Python code. + + Args: + message: The message to process + workflow: The current workflow + context: Additional context information + log_func: Function for workflow logging + + Returns: + Response message + """ + # Initialize logging + workflow_id = workflow.get("id") + logging_utils = LoggingUtils(workflow_id, log_func) + logging_utils.info(f"CoderAgent starting processing", "agents") + + # Create response message + response = self.message_utils.create_message(workflow_id, role="assistant") + response["agent_type"] = self.type + response["agent_name"] = self.name + response["parent_message_id"] = message.get("id") + response["documents"] = [] + + try: + # Check if user directly provided code + content = message.get("content", "") + documents = message.get("documents", []) + + # Extract code from message content + code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content) + code_to_execute = None + + if code_blocks: + # Use the first code block found + code_to_execute = code_blocks[0] + # Clean the code to remove any markdown formatting + code_to_execute = self._clean_code(code_to_execute) + logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents") + else: + # Generate code based on the message content using AI + logging_utils.info("No code found in message, generating new code with AI", "agents") + + # Generate code using AI + code_to_execute, requirements = await self._generate_code_from_prompt(content, documents) + if not code_to_execute: + logging_utils.warning("AI could not generate code", "agents") + response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions." + self.message_utils.finalize_message(response) + return response + logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents") + + # Execute the code + if code_to_execute: + logging_utils.info("Executing code", "execution") + + # Prepare execution context + execution_context = { + "workflow_id": workflow_id, + "documents": documents, + "message": message, + "log_func": log_func + } + + # Add log_func to execution context + execution_context["log_func"] = log_func + + # Execute code + result = await self._execute_code(code_to_execute, requirements, execution_context) + + # Prepare response + if result.get("success", False): + # Code execution successful + output = result.get("output", "") + execution_result = result.get("result") + logging_utils.info("Code executed successfully", "execution") + + # Format response content + response_content = f"## Code executed successfully\n\n" + + # Include the executed code + response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n" + + # Include the output if available + if output: + response_content += f"### Output\n\n```\n{output}\n```\n\n" + + # Include the execution result if available + if execution_result: + result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result) + response_content += f"### Result\n\n```\n{result_str}\n```\n\n" + + response["content"] = response_content + + # Process any files created by the code + if isinstance(execution_result, dict) and "created_files" in execution_result: + created_files = execution_result.get("created_files", []) + for file_info in created_files: + file_id = file_info.get("id") + if file_id: + logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files") + # Add file document to the response + doc = { + "id": f"doc_{uuid.uuid4()}", + "source": file_info, + "type": "file" + } + response["documents"].append(doc) + else: + # Code execution failed + error = result.get("error", "Unknown error") + logging_utils.error(f"Error during code execution: {error}", "execution") + + # Format error response + response_content = f"## Error during code execution\n\n" + response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n" + response_content += f"### Error\n\n```\n{error}\n```\n\n" + + # Add recommendation based on error + response_content += get_error_recommendation(error) + + response["content"] = response_content + else: + # No code to execute + response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly." + + # Finalize response + self.message_utils.finalize_message(response) + + # Log success + logging_utils.info("CoderAgent has successfully processed the request", "agents") + + return response + + except Exception as e: + error_msg = f"Error during processing by the CoderAgent: {str(e)}" + logging_utils.error(error_msg, "error") + + # Create error response + response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```" + self.message_utils.finalize_message(response) + + return response + + def _clean_code(self, code: str) -> str: + """ + Clean up code by removing markdown code block markers and handling other formatting issues. + + Args: + code: The code string to clean + + Returns: + Cleaned code string + """ + import re + + # Remove code block markers if present + code = re.sub(r'^```(?:python)?\s*', '', code) + code = re.sub(r'```\s*$', '', code) + + # Fix potential string literal issues + lines = code.split('\n') + fixed_lines = [] + in_string = False + string_delimiter = None + + for line in lines: + # Very basic string literal parsing - not perfect but helps with common cases + if in_string: + # We're in a multi-line string, check if it ends + if string_delimiter in line and not line.endswith('\\'): + in_string = False + else: + # Check for unclosed string literals + for delimiter in ['"', "'"]: + count = line.count(delimiter) + # If odd number of delimiters and not escaped + if count % 2 == 1 and not line.endswith('\\'): + in_string = True + string_delimiter = delimiter + break + + fixed_lines.append(line) + + # If we ended with an unclosed string, add a closing delimiter + if in_string: + fixed_lines[-1] += string_delimiter + logger.warning(f"Fixed unclosed string literal in code") + + return '\n'.join(fixed_lines) + + async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]: + """ + Generate Python code from a prompt using AI service. + + Args: + prompt: The prompt to generate code from + documents: Documents associated with the prompt + + Returns: + Tuple of (generated Python code, required packages) + """ + try: + # Initialize AI service + chat_service = ChatService() + + # Prepare a prompt for code generation + ai_prompt = f"""Generate Python code to solve the following task: + {prompt} + + Available documents: + """ + # Add information about available documents + if documents: + for i, doc in enumerate(documents): + source = doc.get("source", {}) + doc_name = source.get("name", f"Document {i+1}") + doc_type = source.get("content_type", "unknown") + doc_id = source.get("id", "") + + ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n" + + ai_prompt += """ +IMPORTANT REQUIREMENTS: +1. Your code MUST define a 'result' variable that captures the output of your code. +The execution framework specifically looks for this variable. +2. Write only executable Python code in the Python section. +3. Do not include any text explanations or markdown outside of code comments (#). +4. All explanations should be within Python comments only. +5. Make your code complete and self-contained. +6. For CSV processing, include proper error handling. + +Return your response in the following format: + +## requirements.txt +# Each required package on its own line +pandas +numpy +matplotlib + +## python +import pandas as pd +import numpy as np + +# Load and process data +def process_data(file_path): + try: + # Read the CSV file + df = pd.read_csv(file_path) + return df + except Exception as e: + print(f"Error: {e}") + return None + +# Main processing logic +data = process_data('data.csv') + +# Analyze data +if data is not None: + summary = data.describe() + print("Data summary:") + print(summary) + + # IMPORTANT: Define result variable to return data + result = { + "summary": summary.to_dict(), + "columns": list(data.columns), + "row_count": len(data) + } +else: + # Always define a result, even in error cases + result = {"error": "Failed to process data"} + """ + + # Create messages for the API + messages = [ + {"role": "system", "content": "You are a Python code generator. Generate executable Python code following the specified format with requirements.txt and code sections. The code must be well-commented, include error handling, and define a 'result' variable to capture output."}, + {"role": "user", "content": ai_prompt} + ] + + # Call the API + logging.info(f"Calling AI API to generate code") + generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens) + + # Extract requirements.txt content + requirements_match = re.search(r'## requirements.txt\s*([\s\S]*?)(?=##|\Z)', generated_content) + requirements = [] + if requirements_match: + requirements_text = requirements_match.group(1).strip() + # Filter out markdown formatting and invalid characters + for line in requirements_text.split('\n'): + line = line.strip() + # Skip empty lines, comments, and markdown formatting + if not line or line.startswith('#') or line.startswith('`') or line.endswith('`') or '```' in line: + continue + requirements.append(line) + + # Extract Python code + code_match = re.search(r'## python\s*([\s\S]*?)(?=##|\Z)', generated_content) + if code_match: + code = code_match.group(1).strip() + else: + # Fallback to legacy code block extraction + code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content) + code = code_blocks[0].strip() if code_blocks else generated_content.strip() + + # Clean the code to remove any markdown formatting + code = self._clean_code(code) + + return code, requirements + + except Exception as e: + logging.error(f"Error generating code with AI: {str(e)}", exc_info=True) + # Return basic error handling code and no requirements + error_str = str(e).replace('"', '\\"') + return f""" +# Error during code generation +print(f"An error occurred during code generation: {error_str}") +# Return an error result +result = {{"error": "Code generation failed", "message": "{error_str}"}} +""", [] + + + async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]: + """ + Execute Python code using the CodeExecutor. + + Args: + code: The Python code to execute + requirements: List of required packages + context: Additional context for execution + + Returns: + Result of code execution + """ + # Get workflow ID and set up logging + workflow_id = context.get("workflow_id", "") if context else "" + logging_utils = None + if "log_func" in context and workflow_id: + logging_utils = LoggingUtils(workflow_id, context.get("log_func")) + + if logging_utils: + logging_utils.info("Executing Python code", "execution") + if requirements: + logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution") + + try: + # List of blocked packages for security + blocked_packages = [ + "cryptography", "flask", "django", "tornado", # Security risks + "tensorflow", "pytorch", "scikit-learn" # Resource intensive + ] + + # Initialize CodeExecutor with requirements and workflow_id for persistence + executor = CodeExecutor( + workflow_id=workflow_id, + timeout=self.executor_timeout, + max_memory_mb=self.executor_memory_limit, + requirements=requirements, + blocked_packages=blocked_packages + ) + + # Prepare input data for the code + input_data = {"context": context, "workflow_id": workflow_id} + + # Add file references if available + if context and "documents" in context: + input_data["files"] = [ + { + "id": doc.get("source", {}).get("id", ""), + "name": doc.get("source", {}).get("name", ""), + "type": doc.get("source", {}).get("content_type", "") + } + for doc in context.get("documents", []) + if doc.get("source", {}).get("type") == "file" + ] + + # Execute the code + result = executor.execute_code(code, input_data) + + # Log the execution results + if logging_utils: + if result.get("success", False): + logging_utils.info("Code executed successfully", "execution") + + # Log a preview of the output + output = result.get("output", "") + if output: + preview = output[:1000] + "..." if len(output) > 1000 else output + logging_utils.info(f"Output preview: {preview}", "execution") + + # Log a preview of the result + execution_result = result.get("result") + if execution_result: + if isinstance(execution_result, (dict, list)): + result_str = json.dumps(execution_result, indent=2) + preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str + else: + str_result = str(execution_result) + preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result + + logging_utils.info(f"Result preview: {preview}", "execution") + else: + # Log error information + error = result.get("error", "Unknown error") + logging_utils.error(f"Error during code execution: {error}", "execution") + + # Only clean up non-persistent environments + if not executor.is_persistent: + executor.cleanup() + + return result + + except Exception as e: + error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}" + if logging_utils: + logging_utils.error(error_message, "error") + + return { + "success": False, + "output": "", + "error": error_message, + "result": None + } + + +# Singleton instance +_coder_agent = None + +def get_coder_agent(): + """Returns a singleton instance of the Coder Agent""" + global _coder_agent + if _coder_agent is None: + _coder_agent = CoderAgent() + return _coder_agent \ No newline at end of file diff --git a/gwserver/test.py b/gwserver/test.py index 72413d56..4f9a30d5 100644 --- a/gwserver/test.py +++ b/gwserver/test.py @@ -12,10 +12,11 @@ import uuid from datetime import datetime from typing import Dict, Any, List, Optional -# Configure logging +# Konfiguration des Loggers logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + level=logging.DEBUG, + #format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', + format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)] ) @@ -130,7 +131,7 @@ async def run_test_workflow(): # Create a test message test_message = { "role": "user", - "content": "Please analyze the CSV file and give me a summary of the data." + "content": "Please analyze the CSV file and give me a summary of the data. The ages of the people in the table are by year 2025. In which year the age of all people in the table as a sum is 200 years? Can you please add additional 10 datasets to the table." } # Add a sample CSV file diff --git a/gwserver/workflow_test_result.json b/gwserver/workflow_test_result.json index 67466319..d83adf12 100644 --- a/gwserver/workflow_test_result.json +++ b/gwserver/workflow_test_result.json @@ -1,11 +1,12 @@ { - "workflow_id": "wf_20250411233433", + "workflow_id": "wf_20250414200154", "status": "completed", "messages": [ { - "id": "msg_71988f72-f0dc-431f-a3f1-6cfe84cc339b", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:34:33.805887", + "id": "msg_f40d3472-69f0-4b68-8c64-a6a4fca9a653", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:01:57.968766", "finished_at": null, "sequence_no": 1, "status": "pending", @@ -18,31 +19,33 @@ }, "documents": [ { - "id": 8, + "id": "doc_8ac210d3-5bb7-487d-b7c6-6e8dc3edf6cc", "source": { "type": "file", - "id": 8, + "id": "file_cc30a810-e582-4e10-99f5-cf7fdc0aa49f", "name": "data.csv", "content_type": "application/vnd.ms-excel", "size": 78, - "upload_date": "2025-04-11T23:34:33.809888" + "upload_date": "2025-04-14T20:01:57.970773" }, "contents": [ { "type": "text", "text": "name,age,location\nJohn,30,New York\nAlice,25,London\nBob,35,Paris\nEmma,28,Berlin", - "is_extracted": true + "is_extracted": true, + "extraction_context": null } ] } ], - "content": "Please analyze the CSV file and give me a summary of the data.", + "content": "Please analyze the CSV file and give me a summary of the data. The ages of the people in the table are by year 2025. In which year the age of all people in the table as a sum is 200 years? Can you please add additional 10 datasets to the table.", "agent_type": null }, { - "id": "msg_2bcd999f-0ac7-4ccc-8f4b-0b1aa55fa189", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:35:25.915648", + "id": "msg_c9a32211-0ef1-4ffb-9360-bc9afe33f4e1", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:10.956604", "finished_at": null, "sequence_no": 2, "status": "pending", @@ -54,16 +57,17 @@ "bytes_received": 0 }, "documents": [], - "content": "## Fehler bei der Codeausf\u00fchrung\n\n### Ausgef\u00fchrter Code\n\n```python\nimport pandas as pd\nimport asyncio\n\n# Import the necessary helper functions\nfrom helper_functions import load_file, process_csv\n\nasync def load_and_process_csv(file_id):\n try:\n # Load the CSV file content asynchronously\n csv_content = await load_file(file_id, encoding='utf-8')\n \n # Process the CSV content using pandas\n df = process_csv(csv_content)\n \n # Create a summary of the DataFrame\n summary = {\n 'columns': df.columns.tolist(),\n 'head': df.head().to_dict(orient='records'),\n 'description': df.describe().to_dict()\n }\n \n # Prepare the result dictionary\n result = {\n 'status': 'success',\n 'summary': summary\n }\n \n except Exception as e:\n # Handle any exceptions that occur\n result = {\n 'status': 'error',\n 'message': str(e)\n }\n \n return result\n\n# Example usage\n# Assuming 'data.csv' has a file_id of '12345'\nfile_id = '12345'\nresult = asyncio.run(load_and_process_csv(file_id))\nprint(result)\n```\n\n### Fehler\n\n```\nFehler bei der Installation der erforderlichen Pakete: Fehler bei der Paketinstallation: error: subprocess-exited-with-error\n \n Getting requirements to build wheel did not run successfully.\n exit code: 1\n \n [15 lines of output]\n The 'sklearn' PyPI package is deprecated, use 'scikit-learn'\n rather than 'sklearn' for pip commands.\n \n Here is how to fix this error in the main use cases:\n - use 'pip install scikit-learn' rather than 'pip install sklearn'\n - replace 'sklearn' by 'scikit-learn' in your pip requirements files\n (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)\n - if the 'sklearn' package is used by one of your dependencies,\n it would be great if you take some time to track which package uses\n 'sklearn' instead of 'scikit-learn' and report it to their issue tracker\n - as a last resort, set the environment variable\n SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error\n \n More information is available at\n https://github.com/scikit-learn/sklearn-pypi-package\n [end of output]\n \n note: This error originates from a subprocess, and is likely not a problem with pip.\nerror: subprocess-exited-with-error\n\nGetting requirements to build wheel did not run successfully.\nexit code: 1\n\nSee above for output.\n\nnote: This error originates from a subprocess, and is likely not a problem with pip.\n\n[notice] A new release of pip is available: 23.2.1 -> 25.0.1\n[notice] To update, run: C:\\Users\\pmots\\AppData\\Local\\Temp\\ai_code_exec_x1z8_0nq\\venv\\Scripts\\python.exe -m pip install --upgrade pip\n\n```\n\n\n### Empfehlung\nUm den Fehler zu beheben:\n1. \u00dcberpr\u00fcfen Sie die genaue Fehlermeldung\n2. Vereinfachen Sie den Code und testen Sie schrittweise\n3. Stellen Sie sicher, dass alle ben\u00f6tigten Daten korrekt geladen werden\n4. Verwenden Sie try/except-Bl\u00f6cke f\u00fcr fehleranf\u00e4llige Operationen\n", + "content": "## Code executed successfully\n\n### Executed Code\n\n```python\n# REQUIREMENTS: pandas\n\nimport pandas as pd\nimport os\n\n# Define the path to the CSV file\nfile_path = './input_data/data.csv'\n\n# Initialize the result variable\nresult = {}\n\ntry:\n # Check if the file exists\n if os.path.exists(file_path):\n # Load the CSV file into a DataFrame\n df = pd.read_csv(file_path)\n \n # Convert the DataFrame to a dictionary\n result = df.to_dict(orient='records')\n else:\n result = {\"error\": \"File not found\"}\nexcept Exception as e:\n # Handle any exceptions that occur during file reading or processing\n result = {\"error\": str(e)}\n\n# Output the result\nprint(result)\n```\n\n### Output\n\n```\n[{'name': 'John', 'age': 30, 'location': 'New York'}, {'name': 'Alice', 'age': 25, 'location': 'London'}, {'name': 'Bob', 'age': 35, 'location': 'Paris'}, {'name': 'Emma', 'age': 28, 'location': 'Berlin'}]\n\n```\n\n", "agent_type": "coder", "agent_id": "coder", "agent_name": "Python Code Agent", - "result_format": "DataFrame" + "result_format": "AnalysisReport containing the data structure and initial insights." }, { - "id": "msg_54268450-4dd3-4cbe-8156-0fd067bb3b77", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:35:35.822670", + "id": "msg_92c40d8d-c9d3-4e62-bc68-c1c31bccbc26", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:10.957618", "finished_at": null, "sequence_no": 3, "status": "pending", @@ -75,16 +79,17 @@ "bytes_received": 0 }, "documents": [], - "content": "To analyze the provided data summary, let's break down the information:\n\n1. **Number of Rows and Columns:**\n - The data consists of three columns: `name`, `age`, and `location`.\n - Based on the sample data provided, there are at least four rows (John, Alice, Bob, Emma). However, the ellipsis (`...`) indicates that there may be more rows in the actual dataset.\n\n2. **Data Types of Each Column:**\n - `name`: This column contains text data, so the data type is likely `string` or `object` in pandas.\n - `age`: This column contains numerical data, specifically integers, so the data type is likely `int`.\n - `location`: This column contains text data, so the data type is likely `string` or `object` in pandas.\n\n### Summary:\n- **Columns**: 3 (`name`, `age`, `location`)\n- **Rows**: At least 4 (potentially more)\n- **Data Types**:\n - `name`: String/Object\n - `age`: Integer\n - `location`: String/Object\n\n### Detail Analysis:\n- The dataset appears to be a simple demographic dataset with basic information about individuals.\n- The `age` column is numerical, which allows for statistical analysis such as calculating the average age, age distribution, etc.\n- The `location` and `name` columns are categorical, which can be used for grouping or filtering the data.\n\n### Recommendations:\n- **Data Quality Check**: Ensure there are no missing values or inconsistencies in the `age` column, as it is crucial for numerical analysis.\n- **Data Enrichment**: If possible, add more demographic details such as gender or occupation to enhance the dataset's analytical potential.\n- **Visualization**: Create visualizations such as histograms for age distribution and bar charts for the frequency of locations to better understand the dataset's composition.\n- **Further Analysis**: Consider segmenting the data by location to identify any regional trends or patterns in age distribution.\n\nIf you have access to the full dataset, loading it into a pandas DataFrame would allow for a more comprehensive analysis and verification of these initial insights.", - "agent_type": "analyzer", - "agent_id": "analyzer", - "agent_name": "Datenanalyst", + "content": "## Data Analysis Report\n\nNo data documents were provided for analysis. Please attach CSV, Excel, or other data files for me to analyze.", + "agent_type": "analyst_agent", + "agent_id": "analyst_agent", + "agent_name": "Data Analyst", "result_format": "AnalysisReport" }, { - "id": "msg_342c327a-dc1f-4d5b-b54e-30de0042b17e", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:35:45.975065", + "id": "msg_c2c52c9f-0c69-4037-9d9a-5d120ccf0770", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:18.029940", "finished_at": null, "sequence_no": 4, "status": "pending", @@ -96,16 +101,17 @@ "bytes_received": 0 }, "documents": [], - "content": "To analyze the DataFrame for missing values and provide a summary of their distribution, we need to perform the following steps:\n\n1. **Check for Missing Values**: Identify any missing values in the dataset.\n2. **Summarize Missing Values**: Provide a summary of where these missing values occur.\n\nLet's assume we have loaded the data into a DataFrame named `df`. Here is how you can perform the analysis:\n\n### Step 1: Check for Missing Values\n\n```python\nimport pandas as pd\n\n# Load the data into a DataFrame\ndata = {\n 'name': ['John', 'Alice', 'Bob', 'Emma'],\n 'age': [30, 25, 35, 28],\n 'location': ['New York', 'London', 'Paris', 'Berlin']\n}\ndf = pd.DataFrame(data)\n\n# Check for missing values\nmissing_values = df.isnull().sum()\n```\n\n### Step 2: Summarize Missing Values\n\n```python\n# Summarize missing values\nmissing_summary = missing_values[missing_values > 0]\n\n# Print summary\nprint(\"Summary of Missing Values:\")\nprint(missing_summary)\n```\n\n### Summary\n\n- **Missing Values**: Based on the provided data snippet, there are no missing values in the columns `name`, `age`, and `location`.\n- **Distribution**: Since there are no missing values, there is no distribution to report.\n\n### Detail Analysis\n\n- **Data Completeness**: The dataset appears to be complete with no missing entries in the provided columns.\n- **Potential Issues**: If the dataset were larger, it would be important to check for missing values in other parts of the dataset not shown in the snippet.\n\n### Recommendations\n\n1. **Data Validation**: Ensure that the data collection process is robust to prevent missing values.\n2. **Handling Missing Data**: If missing values are detected in a larger dataset, consider strategies such as imputation or removal, depending on the context and significance of the missing data.\n3. **Regular Checks**: Implement regular checks for data quality to maintain the integrity of the dataset.\n\nIf you have a larger dataset or specific areas you want to focus on, please provide more details for a more tailored analysis.", - "agent_type": "analyzer", - "agent_id": "analyzer", - "agent_name": "Datenanalyst", - "result_format": "AnalysisReport" + "content": "## Code executed successfully\n\n### Executed Code\n\n```python\n# REQUIREMENTS: pandas\n\nimport pandas as pd\n\n# Load the data\nfile_path = './input_data/data.csv'\ntry:\n data = pd.read_csv(file_path)\nexcept FileNotFoundError:\n result = {\"error\": \"File not found\"}\n print(result)\n exit()\n\n# Calculate the sum of ages\ntotal_age = data['age'].sum()\n\n# Initialize variables\nyear = 2025\nage_sum = 0\n\n# Calculate the year when the sum of ages reaches 200\nwhile age_sum < 200:\n age_sum += total_age\n if age_sum >= 200:\n break\n year += 1\n\n# Store the result\nresult = {\"year\": year, \"age_sum\": age_sum}\n\n# Output the result\nprint(result)\n```\n\n### Output\n\n```\n{'year': 2026, 'age_sum': np.float64(236.0)}\n\n```\n\n", + "agent_type": "coder", + "agent_id": "coder", + "agent_name": "Python Code Agent", + "result_format": "AnalysisReport detailing the year when the sum of ages is 200." }, { - "id": "msg_8bcb2566-cc56-4a8f-827a-fa64ad7274c9", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:35:53.968709", + "id": "msg_4124c5b0-1a43-48e4-af9f-6db183956f14", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:18.029940", "finished_at": null, "sequence_no": 5, "status": "pending", @@ -117,16 +123,17 @@ "bytes_received": 0 }, "documents": [], - "content": "To generate descriptive statistics for the provided dataset, we will focus on the numerical column, which is \"age\". The other columns, \"name\" and \"location\", are categorical and do not lend themselves to the same type of statistical analysis. Here is a structured table presenting the descriptive statistics for the \"age\" column:\n\n| Statistic | Age |\n|--------------------|------|\n| Count | 4 |\n| Mean | 29.5 |\n| Median | 29.0 |\n| Standard Deviation | 4.2 |\n| Minimum | 25 |\n| 25th Percentile | 27.0 |\n| 50th Percentile | 29.0 |\n| 75th Percentile | 31.5 |\n| Maximum | 35 |\n\n### Summary\n- **Count**: There are 4 entries in the \"age\" column.\n- **Mean**: The average age is 29.5 years.\n- **Median**: The middle value when ages are ordered is 29 years.\n- **Standard Deviation**: The spread of ages around the mean is approximately 4.2 years.\n- **Quartiles**: The ages are distributed with the 25th percentile at 27 years, the median (50th percentile) at 29 years, and the 75th percentile at 31.5 years.\n\n### Detail Analysis\n- The ages range from 25 to 35 years, indicating a relatively young group.\n- The standard deviation suggests moderate variability in the ages.\n- The quartiles show that half of the individuals are between 27 and 31.5 years old.\n\n### Recommendations\n- If further analysis is needed on age-related trends, consider increasing the sample size for more robust insights.\n- For categorical data like \"location\", consider analyzing the frequency distribution or cross-tabulations with other variables if available.\n- Ensure data quality by checking for any missing or outlier values in larger datasets.", - "agent_type": "analyzer", - "agent_id": "analyzer", - "agent_name": "Datenanalyst", + "content": "## Data Analysis Report\n\nNo data documents were provided for analysis. Please attach CSV, Excel, or other data files for me to analyze.", + "agent_type": "analyst_agent", + "agent_id": "analyst_agent", + "agent_name": "Data Analyst", "result_format": "AnalysisReport" }, { - "id": "msg_e4cb8e47-7fa2-4b6d-981b-18161c86f0e0", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:36:01.195923", + "id": "msg_22728edd-a903-454f-9957-453d258bcceb", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:28.462151", "finished_at": null, "sequence_no": 6, "status": "pending", @@ -138,16 +145,17 @@ "bytes_received": 0 }, "documents": [], - "content": "To analyze the DataFrame and identify the unique values and their counts for each categorical column, we will focus on the 'name' and 'location' columns, as 'age' is typically a numerical column. Here is how the analysis can be structured:\n\n### Data Analysis\n\n#### Unique Values and Counts for Categorical Columns\n\n| Column | Unique Values | Count of Each Unique Value |\n|----------|---------------|----------------------------|\n| name | John | 1 |\n| | Alice | 1 |\n| | Bob | 1 |\n| | Emma | 1 |\n| location | New York | 1 |\n| | London | 1 |\n| | Paris | 1 |\n| | Berlin | 1 |\n\n### Summary\n\n- **Name Column**: Each name in the dataset is unique, with a count of 1 for each individual.\n- **Location Column**: Each location is also unique, with a count of 1 for each city.\n\n### Detail Analysis\n\n- The dataset contains four unique names and four unique locations.\n- There is a one-to-one relationship between names and locations, indicating that each individual is associated with a distinct city.\n\n### Recommendations\n\n- If the dataset is intended to be larger or more diverse, consider adding more entries to increase the variety of names and locations.\n- Ensure that the dataset remains balanced if used for demographic analysis, by potentially adding more entries from different locations or with different names.\n- If further analysis is required, consider exploring additional attributes or expanding the dataset to include more individuals and locations.", - "agent_type": "analyzer", - "agent_id": "analyzer", - "agent_name": "Datenanalyst", - "result_format": "AnalysisReport" + "content": "## Code executed successfully\n\n### Executed Code\n\n```python\n# REQUIREMENTS: pandas,numpy\n\nimport pandas as pd\nimport numpy as np\nimport os\n\n# Load the original dataset\noriginal_data_path = './input_data/data.csv'\ntry:\n original_data = pd.read_csv(original_data_path)\nexcept FileNotFoundError:\n raise Exception(f\"File not found: {original_data_path}\")\n\n# Function to generate random names\ndef generate_random_name():\n first_names = ['John', 'Alice', 'Bob', 'Emma', 'Mike', 'Sara', 'Tom', 'Lucy', 'Jake', 'Lily']\n last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Rodriguez', 'Martinez']\n return np.random.choice(first_names) + \" \" + np.random.choice(last_names)\n\n# Function to generate random ages\ndef generate_random_age():\n return np.random.randint(20, 60)\n\n# Function to generate random locations\ndef generate_random_location():\n locations = ['New York', 'London', 'Paris', 'Berlin', 'Tokyo', 'Sydney', 'Moscow', 'Toronto', 'Dubai', 'Rome']\n return np.random.choice(locations)\n\n# Generate 10 additional datasets\nadditional_datasets = []\nfor _ in range(10):\n new_data = original_data.copy()\n new_data['name'] = [generate_random_name() for _ in range(len(new_data))]\n new_data['age'] = [generate_random_age() for _ in range(len(new_data))]\n new_data['location'] = [generate_random_location() for _ in range(len(new_data))]\n additional_datasets.append(new_data)\n\n# Store the result in a dictionary\nresult = {'datasets': [df.to_dict(orient='records') for df in additional_datasets]}\n\n# Output the result\nprint(result)\n```\n\n### Output\n\n```\n{'datasets': [[{'name': 'Bob Williams', 'age': 49, 'location': np.str_('New York')}, {'name': 'John Jones', 'age': 27, 'location': np.str_('Sydney')}, {'name': 'Jake Martinez', 'age': 55, 'location': np.str_('Tokyo')}, {'name': 'Emma Johnson', 'age': 28, 'location': np.str_('Moscow')}, {'name': 'Jake Rodriguez', 'age': 55, 'location': np.str_('Berlin')}], [{'name': 'John Johnson', 'age': 58, 'location': np.str_('New York')}, {'name': 'John Jones', 'age': 27, 'location': np.str_('Paris')}, {'name': 'Sara Garcia', 'age': 56, 'location': np.str_('Sydney')}, {'name': 'Sara Rodriguez', 'age': 20, 'location': np.str_('New York')}, {'name': 'Lucy Garcia', 'age': 58, 'location': np.str_('Berlin')}], [{'name': 'Lily Johnson', 'age': 58, 'location': np.str_('Rome')}, {'name': 'Sara Rodriguez', 'age': 52, 'location': np.str_('Sydney')}, {'name': 'Sara Davis', 'age': 43, 'location': np.str_('Moscow')}, {'name': 'Alice Johnson', 'age': 43, 'location': np.str_('Berlin')}, {'name': 'Bob Miller', 'age': 35, 'location': np.str_('London')}], [{'name': 'Mike Brown', 'age': 44, 'location': np.str_('Tokyo')}, {'name': 'Jake Miller', 'age': 49, 'location': np.str_('Paris')}, {'name': 'Sara Rodriguez', 'age': 25, 'location': np.str_('London')}, {'name': 'Jake Johnson', 'age': 22, 'location': np.str_('New York')}, {'name': 'Emma Rodriguez', 'age': 22, 'location': np.str_('Dubai')}], [{'name': 'Emma Jones', 'age': 25, 'location': np.str_('Sydney')}, {'name': 'Sara Martinez', 'age': 59, 'location': np.str_('Toronto')}, {'name': 'Lily Rodriguez', 'age': 50, 'location': np.str_('Dubai')}, {'name': 'Emma Davis', 'age': 24, 'location': np.str_('Moscow')}, {'name': 'Jake Davis', 'age': 52, 'location': np.str_('Berlin')}], [{'name': 'Emma Smith', 'age': 24, 'location': np.str_('Toronto')}, {'name': 'Jake Rodriguez', 'age': 35, 'location': np.str_('Tokyo')}, {'name': 'Alice Garcia', 'age': 46, 'location': np.str_('Sydney')}, {'name': 'John Davis', 'age': 37, 'location': np.str_('New York')}, {'name': 'John Johnson', 'age': 50, 'location': np.str_('London')}], [{'name': 'Lily Jones', 'age': 46, 'location': np.str_('London')}, {'name': 'Alice Garcia', 'age': 42, 'location': np.str_('New York')}, {'name': 'John Garcia', 'age': 20, 'location': np.str_('Moscow')}, {'name': 'Sara Smith', 'age': 40, 'location': np.str_('Moscow')}, {'name': 'Jake Garcia', 'age': 32, 'location': np.str_('Sydney')}], [{'name': 'Jake Garcia', 'age': 41, 'location': np.str_('Tokyo')}, {'name': 'Bob Garcia', 'age': 41, 'location': np.str_('New York')}, {'name': 'Alice Rodriguez', 'age': 53, 'location': np.str_('Rome')}, {'name': 'Lucy Jones', 'age': 48, 'location': np.str_('Paris')}, {'name': 'Bob Williams', 'age': 55, 'location': np.str_('New York')}], [{'name': 'Lily Smith', 'age': 35, 'location': np.str_('Rome')}, {'name': 'Sara Garcia', 'age': 36, 'location': np.str_('New York')}, {'name': 'Jake Miller', 'age': 39, 'location': np.str_('Dubai')}, {'name': 'John Garcia', 'age': 35, 'location': np.str_('Sydney')}, {'name': 'Alice Williams', 'age': 56, 'location': np.str_('New York')}], [{'name': 'Lucy Martinez', 'age': 54, 'location': np.str_('Dubai')}, {'name': 'Lucy Williams', 'age': 52, 'location': np.str_('Toronto')}, {'name': 'Lucy Williams', 'age': 57, 'location': np.str_('Tokyo')}, {'name': 'Alice Williams', 'age': 41, 'location': np.str_('London')}, {'name': 'John Davis', 'age': 28, 'location': np.str_('Sydney')}]]}\n\n```\n\n", + "agent_type": "coder", + "agent_id": "coder", + "agent_name": "Python Code Agent", + "result_format": "python_code that generates 10 additional datasets." }, { - "id": "msg_1a9e22b8-420f-4b6e-a851-369b3040d5e3", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:36:16.361313", + "id": "msg_55870e8e-984b-471e-8516-fbec50839090", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:33.323930", "finished_at": null, "sequence_no": 7, "status": "pending", @@ -159,16 +167,17 @@ "bytes_received": 0 }, "documents": [], - "content": "To provide a comprehensive summary of the dataset, I will analyze the structure, missing values, descriptive statistics, and unique values for categorical columns based on the provided data snippet.\n\n### Dataset Summary\n\n#### 1. Structure\n- **Columns**: 3\n - `name`: Categorical\n - `age`: Numerical\n - `location`: Categorical\n- **Sample Data**:\n - John, 30, New York\n - Alice, 25, London\n - Bob, 35, Paris\n - Emma, 28, Berlin\n\n#### 2. Missing Values\n- Based on the provided snippet, there are no missing values. However, a complete dataset check is necessary to confirm this.\n\n#### 3. Descriptive Statistics\n- **Age** (Numerical):\n - Mean: (30 + 25 + 35 + 28) / 4 = 29.5\n - Minimum: 25\n - Maximum: 35\n - Standard Deviation: Calculation requires more data points for accuracy.\n \n#### 4. Unique Values for Categorical Columns\n- **Name**:\n - Unique Values: John, Alice, Bob, Emma\n - Total Unique: 4\n- **Location**:\n - Unique Values: New York, London, Paris, Berlin\n - Total Unique: 4\n\n### Detailed Analysis\n\n- **Age Distribution**: The ages range from 25 to 35, with a mean age of 29.5. This indicates a relatively young demographic.\n- **Location Diversity**: The dataset includes individuals from four major cities, suggesting a diverse geographical representation.\n\n### Recommendations\n\n1. **Data Quality Check**: Conduct a thorough review of the entire dataset to ensure there are no missing values or inconsistencies.\n2. **Expand Dataset**: To improve statistical analysis, consider expanding the dataset to include more entries, which will provide a more robust standard deviation and other statistical measures.\n3. **Categorical Analysis**: Further analysis on the distribution of names and locations could provide insights into demographic patterns or preferences.\n4. **Visualization**: Create visualizations such as histograms for age distribution and bar charts for location frequency to better understand the data.\n\nThis summary provides a foundational understanding of the dataset. Further exploration with complete data will yield more detailed insights.", - "agent_type": "analyzer", - "agent_id": "analyzer", - "agent_name": "Datenanalyst", - "result_format": "AnalysisReport" + "content": "## Code executed successfully\n\n### Executed Code\n\n```python\n# REQUIREMENTS: pandas,numpy\n\nimport pandas as pd\nimport numpy as np\nimport os\n\n# Load the original dataset\noriginal_data_path = './input_data/data.csv'\ntry:\n original_data = pd.read_csv(original_data_path)\nexcept FileNotFoundError:\n raise Exception(f\"File not found: {original_data_path}\")\n\n# Function to generate random names\ndef generate_random_name():\n first_names = ['John', 'Alice', 'Bob', 'Emma', 'Mike', 'Sara', 'Tom', 'Lucy', 'Jake', 'Lily']\n last_names = ['Smith', 'Johnson', 'Williams', 'Brown', 'Jones', 'Garcia', 'Miller', 'Davis', 'Rodriguez', 'Martinez']\n return np.random.choice(first_names) + \" \" + np.random.choice(last_names)\n\n# Function to generate random ages\ndef generate_random_age():\n return np.random.randint(20, 60)\n\n# Function to generate random locations\ndef generate_random_location():\n locations = ['New York', 'London', 'Paris', 'Berlin', 'Tokyo', 'Sydney', 'Moscow', 'Toronto', 'Dubai', 'Rome']\n return np.random.choice(locations)\n\n# Generate 10 additional datasets\nadditional_datasets = []\nfor _ in range(10):\n new_data = original_data.copy()\n new_data['name'] = [generate_random_name() for _ in range(len(new_data))]\n new_data['age'] = [generate_random_age() for _ in range(len(new_data))]\n new_data['location'] = [generate_random_location() for _ in range(len(new_data))]\n additional_datasets.append(new_data)\n\n# Store the result in a dictionary\nresult = {'datasets': [df.to_dict(orient='records') for df in additional_datasets]}\n\n# Output the result\nprint(result)\n```\n\n### Output\n\n```\n{'datasets': [[{'name': 'John Garcia', 'age': 38, 'location': np.str_('Sydney')}, {'name': 'Sara Smith', 'age': 45, 'location': np.str_('Toronto')}, {'name': 'Lily Rodriguez', 'age': 25, 'location': np.str_('Dubai')}, {'name': 'Lucy Brown', 'age': 44, 'location': np.str_('London')}, {'name': 'John Williams', 'age': 37, 'location': np.str_('Moscow')}], [{'name': 'Lily Jones', 'age': 52, 'location': np.str_('Paris')}, {'name': 'Lucy Martinez', 'age': 54, 'location': np.str_('Tokyo')}, {'name': 'Lucy Jones', 'age': 34, 'location': np.str_('Toronto')}, {'name': 'Emma Jones', 'age': 38, 'location': np.str_('New York')}, {'name': 'John Rodriguez', 'age': 21, 'location': np.str_('Moscow')}], [{'name': 'Mike Williams', 'age': 39, 'location': np.str_('Tokyo')}, {'name': 'Bob Jones', 'age': 54, 'location': np.str_('Berlin')}, {'name': 'Sara Miller', 'age': 20, 'location': np.str_('Toronto')}, {'name': 'Lucy Rodriguez', 'age': 50, 'location': np.str_('Sydney')}, {'name': 'Emma Davis', 'age': 33, 'location': np.str_('Moscow')}], [{'name': 'John Williams', 'age': 23, 'location': np.str_('Rome')}, {'name': 'Tom Davis', 'age': 45, 'location': np.str_('Moscow')}, {'name': 'Mike Martinez', 'age': 44, 'location': np.str_('Toronto')}, {'name': 'Tom Jones', 'age': 46, 'location': np.str_('Sydney')}, {'name': 'Bob Smith', 'age': 36, 'location': np.str_('Paris')}], [{'name': 'Tom Brown', 'age': 45, 'location': np.str_('Tokyo')}, {'name': 'Jake Garcia', 'age': 31, 'location': np.str_('New York')}, {'name': 'Bob Garcia', 'age': 52, 'location': np.str_('Berlin')}, {'name': 'Mike Rodriguez', 'age': 39, 'location': np.str_('Sydney')}, {'name': 'Lily Williams', 'age': 59, 'location': np.str_('Tokyo')}], [{'name': 'Tom Miller', 'age': 26, 'location': np.str_('New York')}, {'name': 'Jake Garcia', 'age': 39, 'location': np.str_('Paris')}, {'name': 'Tom Smith', 'age': 24, 'location': np.str_('New York')}, {'name': 'Lucy Johnson', 'age': 34, 'location': np.str_('Moscow')}, {'name': 'Tom Miller', 'age': 33, 'location': np.str_('New York')}], [{'name': 'John Brown', 'age': 23, 'location': np.str_('Tokyo')}, {'name': 'Sara Miller', 'age': 21, 'location': np.str_('Toronto')}, {'name': 'Emma Brown', 'age': 28, 'location': np.str_('Toronto')}, {'name': 'Mike Johnson', 'age': 27, 'location': np.str_('Dubai')}, {'name': 'Lucy Garcia', 'age': 22, 'location': np.str_('London')}], [{'name': 'Mike Miller', 'age': 24, 'location': np.str_('Moscow')}, {'name': 'Emma Garcia', 'age': 36, 'location': np.str_('Paris')}, {'name': 'Jake Martinez', 'age': 53, 'location': np.str_('Dubai')}, {'name': 'Lucy Miller', 'age': 21, 'location': np.str_('Sydney')}, {'name': 'Lily Jones', 'age': 52, 'location': np.str_('Dubai')}], [{'name': 'Lucy Smith', 'age': 43, 'location': np.str_('New York')}, {'name': 'Tom Martinez', 'age': 25, 'location': np.str_('Moscow')}, {'name': 'Lucy Davis', 'age': 20, 'location': np.str_('Tokyo')}, {'name': 'Lucy Jones', 'age': 44, 'location': np.str_('Toronto')}, {'name': 'Bob Rodriguez', 'age': 46, 'location': np.str_('London')}], [{'name': 'Sara Rodriguez', 'age': 28, 'location': np.str_('Sydney')}, {'name': 'Lily Jones', 'age': 24, 'location': np.str_('Sydney')}, {'name': 'Jake Johnson', 'age': 50, 'location': np.str_('Toronto')}, {'name': 'Lily Johnson', 'age': 49, 'location': np.str_('Berlin')}, {'name': 'Mike Miller', 'age': 33, 'location': np.str_('Tokyo')}]]}\n\n```\n\n", + "agent_type": "coder", + "agent_id": "coder", + "agent_name": "Python Code Agent", + "result_format": "Updated CSV file with the original and additional datasets." }, { - "id": "msg_9a9da16c-5361-4544-a353-296ea602d980", - "workflow_id": "wf_20250411233433", - "started_at": "2025-04-11T23:36:26.748703", + "id": "msg_deec0c29-d36f-4de5-87ca-6a877b7eb7cc", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:36.477261", "finished_at": null, "sequence_no": 8, "status": "pending", @@ -180,7 +189,29 @@ "bytes_received": 0 }, "documents": [], - "content": "### Workflow Summary\n\n#### Key Findings and Results\n\n1. **Data Loading and Structure**:\n - The CSV file was loaded and processed, revealing a dataset with three columns: `name`, `age`, and `location`.\n - The dataset contains at least four rows, with potential for more as indicated by the ellipsis.\n\n2. **Data Structure**:\n - Columns: \n - `name` and `location` are categorical.\n - `age` is numerical.\n\n3. **Missing Values**:\n - An analysis was conducted to identify missing values, although specific results were not detailed in the summary.\n\n4. **Descriptive Statistics**:\n - Focused on the `age` column:\n - Count: 4\n - Mean: 29.5\n - Median: 29.0\n - Standard deviation was not fully detailed.\n\n5. **Unique Values**:\n - Unique values and their counts were identified for the `name` and `location` columns.\n\n6. **Comprehensive Summary**:\n - The dataset includes a mix of categorical and numerical data, with a sample including individuals like John, Alice, Bob, and Emma from various locations.\n\n#### Connection to Original Task\n\nThe workflow effectively processed and analyzed the CSV file, providing insights into the dataset's structure, missing values, descriptive statistics, and unique values. This aligns with the original task of loading and analyzing a CSV file to extract meaningful information.\n\n#### Conclusions and Recommendations\n\n- **Conclusions**:\n - The dataset is well-structured with clear categorical and numerical distinctions.\n - Basic descriptive statistics and unique value counts provide a foundational understanding of the data.\n\n- **Recommendations**:\n - Further analysis could explore the handling of missing values if they exist.\n - Additional statistical analysis could be performed on the `age` column for more detailed insights.\n - Consider expanding the dataset analysis to include more rows for a comprehensive view.", + "content": "# Report Document\n\nUnable to generate complex document: AI service not available.", + "agent_type": "documentation_agent", + "agent_id": "documentation_agent", + "agent_name": "Documentation Specialist", + "result_format": "FormattedDocument" + }, + { + "id": "msg_8ef7db00-b8f6-4495-be6d-cc3f78dac45b", + "workflow_id": "wf_20250414200154", + "parent_message_id": null, + "started_at": "2025-04-14T20:03:46.590203", + "finished_at": null, + "sequence_no": 9, + "status": "pending", + "role": "assistant", + "data_stats": { + "processing_time": 0.0, + "token_count": 0, + "bytes_sent": 0, + "bytes_received": 0 + }, + "documents": [], + "content": "### Summary of Workflow Results\n\n1. **Activity 1: Load and Analyze CSV Data**\n - **Objective:** To load and perform an initial analysis of the CSV data.\n - **Outcome:** No data documents were provided, preventing analysis. Recommendation: Ensure CSV or other data files are attached for analysis.\n\n2. **Activity 2: Calculate Year When Sum of Ages is 200**\n - **Objective:** To determine the year when the sum of ages in the dataset equals 200.\n - **Outcome:** Similar to Activity 1, no data was provided, making the calculation impossible. Recommendation: Attach the necessary data files for accurate analysis.\n\n3. **Activity 3: Generate Additional Datasets**\n - **Objective:** To create 10 additional datasets for integration.\n - **Outcome:** Code executed successfully, indicating datasets were generated. However, the absence of the original dataset limits the context of these additional datasets.\n\n4. **Activity 4: Integrate Additional Datasets**\n - **Objective:** To integrate the newly generated datasets into the original table.\n - **Outcome:** Code executed successfully, suggesting integration was achieved. Yet, without the original dataset, the integration's effectiveness cannot be assessed.\n\n5. **Activity 5: Document the Analysis and Findings**\n - **Objective:** To document the analysis and findings comprehensively.\n - **Outcome:** Documentation was not generated due to AI service unavailability. Recommendation: Address AI service issues for future documentation needs.\n\n### Conclusions and Recommendations\n\n- **Key Findings:** The workflow was hindered by the absence of the original data file, impacting the ability to perform meaningful analysis and calculations.\n- **Recommendations:** Ensure all necessary data files are provided at the start of the workflow to facilitate successful execution of tasks. Additionally, address technical issues related to AI services to enable comprehensive documentation in future projects.", "agent_type": "summary", "agent_id": "workflow_summary", "agent_name": "Workflow Summary", diff --git a/notes/changelog.txt b/notes/changelog.txt index 37fcf708..76933313 100644 --- a/notes/changelog.txt +++ b/notes/changelog.txt @@ -1,22 +1,17 @@ ....................... TASKS -PowerOn Message rein -Bericht: Kürzer ----------------------- OPEN PRIO1: - -nda einbinden - -agents_modules to load dynamically - ENV Variable setzen (extract from code) und config in .gitignore für example.env Database to remove from backend +Integrate NDA Text as modal form - Data governance agreement by login with checkbox + workflow.css --> cleanup, later definition is newer @@ -24,10 +19,6 @@ workflow.css --> cleanup, later definition is newer PRIO2: -Integrate NDA Text as modal form - Data governance agreement by login with checkbox - -Test reference find in prompt? - if not sufficient to take subroutine to reference to "the single file" or to the relevant file - backend: all object actions in interfaces generic for the objects in models for CRU-methods frontend: no labels definition @@ -40,6 +31,14 @@ add connector to myoutlook ----------------------- DONE + +Modul "agentservice_agent_documentation.py": Bitte die Berichterstellung adaptiv zum Prompt machen. Bei einfachen Berichten eher eine Zusammenfassung, bei komplexen Berichten mit Kapiteln arbeiten. + + +PowerOn Message: Kannst Du einbauen, dass bei einem User Prompt, welcher in irgend einer Sprache fragt, "was PowerOn ist", dass dann die Rückmeldung is der Sprache der Anfrage etwas in dieser Art ist (bitte schön formulieren): *Ich bin glücklich, Teil der PowerOn Familie zu sein, welche sich dafür einsetzt, dass wir einander unterstüzzen und Gutes tun". + + + DOKUS Doku des Systems für Investoren (Hi-level Struktur, Integrationsfähigkeit und Skalierbarkeit) Doku des Systems für Code Integration