3 level agents
This commit is contained in:
parent
a1c1f04922
commit
61acf4950b
56 changed files with 14532 additions and 1717 deletions
|
|
@ -82,5 +82,17 @@
|
|||
"hash": "c17eb7cc2ed742ddeada7a9548bc5e7c943ed68456c3612acb3c0a94809e5c65",
|
||||
"upload_date": "2025-04-04T18:33:15.676504",
|
||||
"id": 7
|
||||
},
|
||||
{
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "data.csv",
|
||||
"type": "document",
|
||||
"content_type": "application/vnd.ms-excel",
|
||||
"size": 78,
|
||||
"path": "./_uploads\\1\\file_7980d2d6-53c8-48ed-930c-fa5dd114ce15_data.csv",
|
||||
"hash": "8ba6e6b67fe69411e4eb3962180591aaa67778fa96b990b6df79efbc398bce31",
|
||||
"upload_date": "2025-04-11T10:18:31.414800",
|
||||
"id": 8
|
||||
}
|
||||
]
|
||||
190
gwserver/_old_bk_modules/agentservice_agent_analyst.py
Normal file
190
gwserver/_old_bk_modules/agentservice_agent_analyst.py
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
"""
|
||||
Datenanalyst-Agent für die Analyse und Interpretation von Daten.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AnalystAgent(BaseAgent):
|
||||
"""Agent für die Analyse und Interpretation von Daten"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Datenanalyst-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "analyst_agent"
|
||||
self.name = "Datenanalyst"
|
||||
self.type = "analyzer"
|
||||
self.description = "Analysiert und interpretiert Daten"
|
||||
self.capabilities = "Datenanalyse, Mustererkennung, Statistik und Bewertung"
|
||||
self.instructions = """
|
||||
Du bist der Datenanalyseagent. Deine Aufgabe:
|
||||
1. Vorliegende Daten untersuchen und interpretieren
|
||||
2. Erkenntnisse aus Informationen gewinnen
|
||||
3. Trends identifizieren und Zusammenhänge prüfen
|
||||
4. Daten visualisieren und Konzepte erklären
|
||||
5. Datenqualität bewerten und Handlungsempfehlungen geben
|
||||
"""
|
||||
self.result_format = "AnalysisReport"
|
||||
|
||||
def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generiert einen angepassten Prompt für den Datenanalysten.
|
||||
|
||||
Args:
|
||||
message_context: Kontext der Nachricht
|
||||
|
||||
Returns:
|
||||
Formatierter Prompt für den Datenanalysten
|
||||
"""
|
||||
# Basis-Prompt
|
||||
prompt = f"""
|
||||
Du bist {self.name}, ein {self.type} Agent.
|
||||
|
||||
{self.description}
|
||||
|
||||
Fähigkeiten: {self.capabilities}
|
||||
|
||||
{self.instructions}
|
||||
|
||||
Analysiere die vorliegenden Daten. Präsentiere klar strukturierte Ergebnisse
|
||||
mit einer Zusammenfassung, Detailanalyse und Handlungsempfehlungen.
|
||||
|
||||
Formatiere mit [STATUS: ERGEBNIS/TEILWEISE/PLAN] am Ende.
|
||||
"""
|
||||
|
||||
# Dateitypspezifische Anweisungen hinzufügen (verkürzt)
|
||||
document_types = self._get_document_types(message_context)
|
||||
|
||||
if "csv" in document_types or "excel" in document_types:
|
||||
prompt += "\nTABELLENDATEN: Identifiziere wichtige Spalten, Korrelationen und Trends."
|
||||
|
||||
if "pdf" in document_types or "doc" in document_types:
|
||||
prompt += "\nTEXTDATEN: Extrahiere zentrale Fakten und Schlüsselthemen."
|
||||
|
||||
if "image" in document_types:
|
||||
prompt += "\nBILDDATEN: Beschreibe und interpretiere dargestellte Informationen."
|
||||
|
||||
return prompt.strip()
|
||||
|
||||
def _get_document_types(self, message_context: Dict[str, Any]) -> List[str]:
|
||||
"""
|
||||
Extrahiert die Dateitypen aus dem Nachrichtenkontext.
|
||||
|
||||
Args:
|
||||
message_context: Kontext der Nachricht
|
||||
|
||||
Returns:
|
||||
Liste der Dateitypen
|
||||
"""
|
||||
document_types = []
|
||||
|
||||
# Versuche Dokumente aus dem Kontext zu extrahieren
|
||||
documents = message_context.get("documents", [])
|
||||
|
||||
for doc in documents:
|
||||
source = doc.get("source", {})
|
||||
name = source.get("name", "").lower()
|
||||
content_type = source.get("content_type", "").lower()
|
||||
|
||||
# Dateityp aus Namen oder Content-Type ableiten
|
||||
if name.endswith(".csv") or "csv" in content_type:
|
||||
document_types.append("csv")
|
||||
elif name.endswith((".xls", ".xlsx")) or "excel" in content_type or "spreadsheet" in content_type:
|
||||
document_types.append("excel")
|
||||
elif name.endswith(".pdf") or "pdf" in content_type:
|
||||
document_types.append("pdf")
|
||||
elif name.endswith((".doc", ".docx")) or "word" in content_type:
|
||||
document_types.append("doc")
|
||||
elif name.endswith((".jpg", ".jpeg", ".png", ".gif")) or "image" in content_type:
|
||||
document_types.append("image")
|
||||
|
||||
return document_types
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und führt eine Datenanalyse durch.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext (optional)
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort mit der Datenanalyse
|
||||
"""
|
||||
try:
|
||||
# Prompt generieren
|
||||
message_context = {"documents": context.get("documents", [])} if context else {}
|
||||
prompt = self.get_prompt(message_context)
|
||||
|
||||
# OpenAI ChatService initialisieren
|
||||
chat_service = ChatService()
|
||||
|
||||
# Nachrichten für die API vorbereiten
|
||||
messages = [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": message.get("content", "")}
|
||||
]
|
||||
|
||||
# Kontext-Nachrichten hinzufügen, falls vorhanden
|
||||
if context and "history" in context:
|
||||
for history_item in context["history"]:
|
||||
messages.append({
|
||||
"role": history_item.get("role", "user"),
|
||||
"content": history_item.get("content", "")
|
||||
})
|
||||
|
||||
# API aufrufen
|
||||
response_content = await chat_service.call_api(messages)
|
||||
|
||||
# Verbindung schließen
|
||||
await chat_service.close()
|
||||
|
||||
# Antwort-Objekt erstellen
|
||||
analysis_response = {
|
||||
"role": "assistant",
|
||||
"content": response_content,
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(analysis_response["content"])
|
||||
analysis_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
analysis_response["result_format"] = self.result_format
|
||||
return analysis_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Verarbeitung der Anfrage: {str(e)}", exc_info=True)
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Datenanalyse ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Singleton-Instanz
|
||||
_analyst_agent = None
|
||||
|
||||
def get_analyst_agent():
|
||||
"""Gibt eine Singleton-Instanz des Datenanalyst-Agenten zurück"""
|
||||
global _analyst_agent
|
||||
if _analyst_agent is None:
|
||||
_analyst_agent = AnalystAgent()
|
||||
return _analyst_agent
|
||||
426
gwserver/_old_bk_modules/agentservice_agent_coder.py
Normal file
426
gwserver/_old_bk_modules/agentservice_agent_coder.py
Normal file
|
|
@ -0,0 +1,426 @@
|
|||
"""
|
||||
Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code (Fortsetzung).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
from typing import List, Dict, Any, Optional
|
||||
import asyncio
|
||||
import re
|
||||
import traceback
|
||||
from datetime import datetime
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
from modules.agentservice_code_executor import CodeExecutor
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CoderAgent(BaseAgent):
|
||||
"""Erweiterter Agent für die Entwicklung und Ausführung von Python-Code"""
|
||||
|
||||
async def _execute_code(self, code: str, lucydom_interface, context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt Python-Code mit dem CodeExecutor aus.
|
||||
|
||||
Args:
|
||||
code: Der auszuführende Python-Code
|
||||
lucydom_interface: Interface für Datenbankzugriffe
|
||||
context: Zusätzlicher Kontext
|
||||
|
||||
Returns:
|
||||
Ergebnis der Codeausführung
|
||||
"""
|
||||
try:
|
||||
# Systemfunktionen für den Code vorbereiten
|
||||
system_functions_code = self._prepare_system_functions(lucydom_interface)
|
||||
|
||||
# Code mit Systemfunktionen erweitern
|
||||
enhanced_code = system_functions_code + "\n\n" + code
|
||||
|
||||
# CodeExecutor initialisieren
|
||||
available_modules = [
|
||||
"modules.lucydom_interface",
|
||||
"modules.lucydom_model",
|
||||
"modules.agentservice_filehandling"
|
||||
]
|
||||
|
||||
# Liste erlaubter Pakete
|
||||
allowed_packages = None # None bedeutet alle erlaubt außer explizit blockierte
|
||||
|
||||
# Liste blockierter Pakete
|
||||
blocked_packages = [
|
||||
"cryptography", "flask", "django", "tornado", # Sicherheitsrisiken
|
||||
"tensorflow", "pytorch", "scikit-learn" # Ressourcenintensiv
|
||||
]
|
||||
|
||||
executor = CodeExecutor(
|
||||
app_modules=available_modules,
|
||||
timeout=60, # 60 Sekunden Timeout
|
||||
max_memory_mb=512, # 512MB Speicherlimit
|
||||
allowed_packages=allowed_packages,
|
||||
blocked_packages=blocked_packages
|
||||
)
|
||||
|
||||
try:
|
||||
# Eingabedaten vorbereiten (falls vorhanden)
|
||||
input_data = {
|
||||
"context": context,
|
||||
"workflow_id": context.get("workflow_id", "") if context else "",
|
||||
}
|
||||
|
||||
# Dateireferenzen hinzufügen
|
||||
if context and "documents" in context:
|
||||
file_refs = []
|
||||
for doc in context.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
if source.get("type") == "file":
|
||||
file_refs.append({
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", "")
|
||||
})
|
||||
input_data["files"] = file_refs
|
||||
|
||||
# Code ausführen
|
||||
result = executor.execute_code(enhanced_code, input_data)
|
||||
|
||||
# Log für die Ausführung
|
||||
if result.get("success", False):
|
||||
logger.info(f"Code erfolgreich ausgeführt")
|
||||
output = result.get("output", "")
|
||||
if output:
|
||||
logger.debug(f"Ausgabe: {output[:200]}..." if len(output) > 200 else output)
|
||||
else:
|
||||
logger.error(f"Fehler bei der Codeausführung: {result.get('error', 'Unbekannter Fehler')}")
|
||||
|
||||
return result
|
||||
finally:
|
||||
# Ressourcen freigeben
|
||||
executor.cleanup()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Codeausführung: {str(e)}", exc_info=True)
|
||||
|
||||
return {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Fehler bei der Ausführung: {str(e)}\n{traceback.format_exc()}",
|
||||
"result": None
|
||||
}
|
||||
|
||||
def _prepare_system_functions(self, lucydom_interface) -> str:
|
||||
"""
|
||||
Bereitet die Systemfunktionen für den auszuführenden Code vor.
|
||||
|
||||
Args:
|
||||
lucydom_interface: Interface für Datenbankzugriffe
|
||||
|
||||
Returns:
|
||||
Python-Code für die Systemfunktionen
|
||||
"""
|
||||
system_functions_code = """
|
||||
# Systemfunktionen für den Code
|
||||
|
||||
async def load_file(file_id, encoding=None):
|
||||
\"\"\"
|
||||
Lädt eine Datei aus der Datenbank anhand ihrer ID.
|
||||
|
||||
Args:
|
||||
file_id: ID der zu ladenden Datei
|
||||
encoding: Optionale Kodierung (Standard: None für binäre Daten)
|
||||
|
||||
Returns:
|
||||
Binäre Daten oder dekodierter String, je nach Encoding-Parameter
|
||||
\"\"\"
|
||||
try:
|
||||
# lucydom_interface wird über Globals zur Verfügung gestellt
|
||||
global lucydom_interface
|
||||
|
||||
if not lucydom_interface:
|
||||
raise ValueError("LucyDOM-Interface nicht verfügbar")
|
||||
|
||||
# Dateiinhalt asynchron laden
|
||||
file_content = await lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if not file_content:
|
||||
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
# Wenn Encoding angegeben ist, String zurückgeben
|
||||
if encoding:
|
||||
return file_content.decode(encoding)
|
||||
|
||||
# Andernfalls binäre Daten zurückgeben
|
||||
return file_content
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Laden der Datei {file_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
def save_file(content, file_name, content_type=None):
|
||||
\"\"\"
|
||||
Speichert Daten als Datei in der Datenbank.
|
||||
|
||||
Args:
|
||||
content: Zu speichernde Daten (String oder Bytes)
|
||||
file_name: Name der Datei
|
||||
content_type: MIME-Typ der Datei (z.B. 'text/csv')
|
||||
|
||||
Returns:
|
||||
Metadaten der gespeicherten Datei inkl. ID
|
||||
\"\"\"
|
||||
try:
|
||||
# lucydom_interface wird über Globals zur Verfügung gestellt
|
||||
global lucydom_interface
|
||||
|
||||
if not lucydom_interface:
|
||||
raise ValueError("LucyDOM-Interface nicht verfügbar")
|
||||
|
||||
# Wenn der Inhalt ein String ist, in Bytes konvertieren
|
||||
if isinstance(content, str):
|
||||
content = content.encode('utf-8')
|
||||
|
||||
# Datei speichern
|
||||
file_meta = lucydom_interface.save_uploaded_file(content, file_name)
|
||||
|
||||
# Wenn content_type angegeben ist, Datei-Metadaten aktualisieren
|
||||
if content_type and "id" in file_meta:
|
||||
update_data = {"content_type": content_type}
|
||||
lucydom_interface.update_file(file_meta["id"], update_data)
|
||||
file_meta["content_type"] = content_type
|
||||
|
||||
return file_meta
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Speichern der Datei {file_name}: {str(e)}")
|
||||
raise
|
||||
|
||||
def update_file(file_id, content, update_metadata=None):
|
||||
\"\"\"
|
||||
Aktualisiert eine bestehende Datei in der Datenbank.
|
||||
|
||||
Args:
|
||||
file_id: ID der zu aktualisierenden Datei
|
||||
content: Neue Inhalte für die Datei (String oder Bytes)
|
||||
update_metadata: Optionale Metadaten-Updates
|
||||
|
||||
Returns:
|
||||
Aktualisierte Metadaten der Datei
|
||||
\"\"\"
|
||||
try:
|
||||
# lucydom_interface wird über Globals zur Verfügung gestellt
|
||||
global lucydom_interface
|
||||
|
||||
if not lucydom_interface:
|
||||
raise ValueError("LucyDOM-Interface nicht verfügbar")
|
||||
|
||||
# Wenn der Inhalt ein String ist, in Bytes konvertieren
|
||||
if isinstance(content, str):
|
||||
content = content.encode('utf-8')
|
||||
|
||||
# Temporäre Datei erstellen
|
||||
import tempfile
|
||||
import os
|
||||
|
||||
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
||||
temp_file.write(content)
|
||||
temp_file.close()
|
||||
|
||||
# Bestehende Datei abrufen
|
||||
file_meta = lucydom_interface.get_file(file_id)
|
||||
|
||||
if not file_meta:
|
||||
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
# Datei mit neuen Inhalten aktualisieren
|
||||
with open(temp_file.name, 'rb') as f:
|
||||
updated_meta = lucydom_interface.save_uploaded_file(content, file_meta.get("name", "updated_file"))
|
||||
|
||||
# Temporäre Datei löschen
|
||||
os.unlink(temp_file.name)
|
||||
|
||||
# Metadaten aktualisieren
|
||||
if update_metadata and "id" in updated_meta:
|
||||
lucydom_interface.update_file(updated_meta["id"], update_metadata)
|
||||
updated_meta.update(update_metadata)
|
||||
|
||||
return updated_meta
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Aktualisieren der Datei {file_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
def get_file_metadata(file_id):
|
||||
\"\"\"
|
||||
Ruft die Metadaten einer Datei ab.
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
|
||||
Returns:
|
||||
Metadaten der Datei als Dictionary
|
||||
\"\"\"
|
||||
try:
|
||||
# lucydom_interface wird über Globals zur Verfügung gestellt
|
||||
global lucydom_interface
|
||||
|
||||
if not lucydom_interface:
|
||||
raise ValueError("LucyDOM-Interface nicht verfügbar")
|
||||
|
||||
# Datei-Metadaten abrufen
|
||||
file_meta = lucydom_interface.get_file(file_id)
|
||||
|
||||
if not file_meta:
|
||||
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
|
||||
|
||||
return file_meta
|
||||
except Exception as e:
|
||||
print(f"Fehler beim Abrufen der Metadaten für Datei {file_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
def process_csv(content, operations=None):
|
||||
\"\"\"
|
||||
Verarbeitet CSV-Daten mit Pandas.
|
||||
|
||||
Args:
|
||||
content: CSV-Daten als String oder Bytes
|
||||
operations: Liste von Operationen, die auf den Daten ausgeführt werden sollen
|
||||
[{"type": "filter", "column": "Name", "value": "Max"},
|
||||
{"type": "groupby", "column": "Category"}]
|
||||
|
||||
Returns:
|
||||
Ergebnis der Verarbeitung als Dictionary
|
||||
\"\"\"
|
||||
try:
|
||||
import pandas as pd
|
||||
import io
|
||||
|
||||
# Wenn der Inhalt Bytes ist, in String konvertieren
|
||||
if isinstance(content, bytes):
|
||||
content = content.decode('utf-8')
|
||||
|
||||
# CSV in DataFrame laden
|
||||
df = pd.read_csv(io.StringIO(content))
|
||||
|
||||
# Wenn Operationen angegeben sind, diese durchführen
|
||||
if operations:
|
||||
for op in operations:
|
||||
op_type = op.get("type", "").lower()
|
||||
|
||||
if op_type == "filter" and "column" in op and "value" in op:
|
||||
df = df[df[op["column"]] == op["value"]]
|
||||
|
||||
elif op_type == "groupby" and "column" in op:
|
||||
groupby_column = op["column"]
|
||||
agg_column = op.get("aggregate_column")
|
||||
agg_func = op.get("aggregate_function", "count")
|
||||
|
||||
if agg_column:
|
||||
df = df.groupby(groupby_column).agg({agg_column: agg_func}).reset_index()
|
||||
else:
|
||||
df = df.groupby(groupby_column).size().reset_index(name='count')
|
||||
|
||||
# Ergebnis zurückgeben
|
||||
return {
|
||||
"data": df.to_dict('records'),
|
||||
"columns": df.columns.tolist(),
|
||||
"shape": df.shape
|
||||
}
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der CSV-Verarbeitung: {str(e)}")
|
||||
raise
|
||||
|
||||
def extract_text_from_pdf(pdf_data):
|
||||
\"\"\"
|
||||
Extrahiert Text aus einem PDF-Dokument.
|
||||
|
||||
Args:
|
||||
pdf_data: PDF-Daten als Bytes
|
||||
|
||||
Returns:
|
||||
Extrahierter Text aus dem PDF
|
||||
\"\"\"
|
||||
try:
|
||||
# Versuche PyPDF2 zu verwenden
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
from io import BytesIO
|
||||
|
||||
reader = PdfReader(BytesIO(pdf_data))
|
||||
text = ""
|
||||
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\\n\\n"
|
||||
|
||||
return text
|
||||
except ImportError:
|
||||
# Fallback auf pymupdf, falls PyPDF2 nicht verfügbar ist
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
from io import BytesIO
|
||||
|
||||
doc = fitz.open("pdf", pdf_data)
|
||||
text = ""
|
||||
|
||||
for page in doc:
|
||||
text += page.get_text() + "\\n\\n"
|
||||
|
||||
return text
|
||||
except ImportError:
|
||||
return "PDF-Extraktion fehlgeschlagen: Weder PyPDF2 noch PyMuPDF sind installiert"
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der PDF-Extraktion: {str(e)}")
|
||||
return f"Fehler bei der PDF-Extraktion: {str(e)}"
|
||||
|
||||
def analyze_image(image_data, analysis_type="description"):
|
||||
\"\"\"
|
||||
Analysiert ein Bild (KI-basiert, falls verfügbar).
|
||||
|
||||
Args:
|
||||
image_data: Bilddaten als Bytes
|
||||
analysis_type: Art der Analyse: 'description', 'objects', 'text'
|
||||
|
||||
Returns:
|
||||
Ergebnis der Bildanalyse
|
||||
\"\"\"
|
||||
# Hinweis: Diese Funktion simuliert eine Bildanalyse,
|
||||
# da die echte KI-Analyse eine async-Funktion erfordern würde
|
||||
try:
|
||||
# Bildgröße ermitteln
|
||||
from io import BytesIO
|
||||
from PIL import Image
|
||||
|
||||
image = Image.open(BytesIO(image_data))
|
||||
width, height = image.size
|
||||
format_name = image.format
|
||||
|
||||
# Simulierte Analyse basierend auf dem Bildtyp
|
||||
analysis_result = {
|
||||
"image_info": {
|
||||
"width": width,
|
||||
"height": height,
|
||||
"format": format_name,
|
||||
"size_bytes": len(image_data)
|
||||
},
|
||||
"analysis_type": analysis_type,
|
||||
"analysis_result": f"Simulierte Bildanalyse für ein {format_name}-Bild ({width}x{height}px)"
|
||||
}
|
||||
|
||||
return analysis_result
|
||||
except Exception as e:
|
||||
print(f"Fehler bei der Bildanalyse: {str(e)}")
|
||||
return {"error": str(e)}
|
||||
|
||||
# lucydom_interface global verfügbar machen
|
||||
import asyncio
|
||||
"""
|
||||
|
||||
return system_functions_code
|
||||
|
||||
# Singleton-Instanz
|
||||
_coder_agent = None
|
||||
|
||||
def get_coder_agent():
|
||||
"""Gibt eine Singleton-Instanz des Coder-Agenten zurück"""
|
||||
global _coder_agent
|
||||
if _coder_agent is None:
|
||||
_coder_agent = CoderAgent()
|
||||
return _coder_agent
|
||||
422
gwserver/_old_bk_modules/agentservice_agent_documentation.py
Normal file
422
gwserver/_old_bk_modules/agentservice_agent_documentation.py
Normal file
|
|
@ -0,0 +1,422 @@
|
|||
"""
|
||||
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
|
||||
Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentationAgent(BaseAgent):
|
||||
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Dokumentations-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "documentation_agent"
|
||||
self.name = "Dokumentation"
|
||||
self.type = "documentation"
|
||||
self.description = "Erstellt Dokumentation und strukturierte Inhalte"
|
||||
self.capabilities = "Berichte, Dokumentationen"
|
||||
self.instructions = """
|
||||
Du bist der Dokumentations-Agent. Deine Aufgabe:
|
||||
1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen
|
||||
2. Verschiedene Dokumentformate erstellen
|
||||
3. Informationen aus verschiedenen Quellen strukturieren
|
||||
4. Technische Konzepte verständlich erklären
|
||||
5. Konsistente Formatierung sicherstellen
|
||||
"""
|
||||
# Chat-Service initialisieren
|
||||
self.chat_service = None
|
||||
self.result_format = "FormattedDocument"
|
||||
|
||||
def get_base_prompt(self, document_type: str = "") -> str:
|
||||
"""
|
||||
Generiert einen Basis-Prompt für den Dokumentations-Agenten.
|
||||
|
||||
Args:
|
||||
document_type: Typ des zu erstellenden Dokuments
|
||||
|
||||
Returns:
|
||||
Basis-Prompt für den Dokumentations-Agenten
|
||||
"""
|
||||
# Basis-Prompt
|
||||
prompt = f"""
|
||||
Du bist {self.name}, ein {self.type} Agent.
|
||||
|
||||
{self.description}
|
||||
|
||||
Fähigkeiten: {self.capabilities}
|
||||
|
||||
{self.instructions}
|
||||
"""
|
||||
|
||||
# Dokumenttyp-spezifische Anweisungen hinzufügen
|
||||
if document_type:
|
||||
prompt += self._get_document_type_instructions(document_type)
|
||||
|
||||
return prompt.strip()
|
||||
|
||||
def _get_document_type_instructions(self, document_type: str) -> str:
|
||||
"""
|
||||
Gibt spezifische Anweisungen für einen bestimmten Dokumenttyp zurück.
|
||||
|
||||
Args:
|
||||
document_type: Typ des Dokuments
|
||||
|
||||
Returns:
|
||||
Spezifische Anweisungen für den Dokumenttyp
|
||||
"""
|
||||
document_type = document_type.lower()
|
||||
|
||||
if "handbuch" in document_type or "anleitung" in document_type or "guide" in document_type:
|
||||
return "\n\nHANDBUCH: Beginne mit Zweckbeschreibung, strukturiere in logische Schritte, verwende direkte Anweisungen."
|
||||
elif "bericht" in document_type or "report" in document_type:
|
||||
return "\n\nBERICHT: Beginne mit Executive Summary, strukturiere in thematische Abschnitte, halte professionellen Ton."
|
||||
elif "prozess" in document_type or "process" in document_type:
|
||||
return "\n\nPROZESS: Beschreibe Zweck, Ziele, Beteiligte, sequenzielle Schritte, Inputs/Outputs und Verantwortlichkeiten."
|
||||
elif "präsentation" in document_type or "presentation" in document_type:
|
||||
return "\n\nPRÄSENTATION: Klare Hauptpunkte, visuelle Elemente, Einleitung-Hauptteil-Schluss Struktur."
|
||||
else:
|
||||
return "\n\nDOKUMENT: Erstelle ein gut strukturiertes Dokument mit klarer Gliederung und präziser Sprache."
|
||||
|
||||
def _detect_document_type(self, message: str) -> str:
|
||||
"""
|
||||
Erkennt den Dokumenttyp aus der Nachricht.
|
||||
|
||||
Args:
|
||||
message: Nachricht des Benutzers
|
||||
|
||||
Returns:
|
||||
Erkannter Dokumenttyp
|
||||
"""
|
||||
message = message.lower()
|
||||
|
||||
if "handbuch" in message or "anleitung" in message or "guide" in message:
|
||||
return "handbuch"
|
||||
elif "bericht" in message or "report" in message:
|
||||
return "bericht"
|
||||
elif "prozess" in message or "process" in message or "ablauf" in message:
|
||||
return "prozess"
|
||||
elif "präsentation" in message or "presentation" in message or "folien" in message:
|
||||
return "präsentation"
|
||||
else:
|
||||
return "dokument"
|
||||
|
||||
async def generate_title(self, task: str, document_type: str) -> str:
|
||||
"""
|
||||
Generiert einen Titel für das Dokument.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabe/Anfrage
|
||||
document_type: Typ des Dokuments
|
||||
|
||||
Returns:
|
||||
Generierter Titel
|
||||
"""
|
||||
prompt = f"""
|
||||
Erstelle einen prägnanten, professionellen Titel für folgendes {document_type.capitalize()}:
|
||||
|
||||
AUFTRAG: {task}
|
||||
|
||||
Gib NUR den Titel zurück, ohne weitere Erklärungen oder Formatierungen.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "Du bist ein Experte für die Erstellung von Dokumenttiteln."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
title = await self.chat_service.call_api(messages)
|
||||
|
||||
# Bereinige den Titel von Anführungszeichen und Überschriften-Symbolen
|
||||
title = title.strip('"\'#*- \n\t')
|
||||
|
||||
return title
|
||||
|
||||
async def generate_summary(self, task: str, document_type: str, title: str) -> str:
|
||||
"""
|
||||
Generiert eine Zusammenfassung für das Dokument.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabe/Anfrage
|
||||
document_type: Typ des Dokuments
|
||||
title: Titel des Dokuments
|
||||
|
||||
Returns:
|
||||
Generierte Zusammenfassung
|
||||
"""
|
||||
prompt = f"""
|
||||
Erstelle eine prägnante Zusammenfassung für folgendes Dokument:
|
||||
|
||||
TITEL: {title}
|
||||
TYP: {document_type.capitalize()}
|
||||
AUFTRAG: {task}
|
||||
|
||||
Die Zusammenfassung soll einen Überblick über den Zweck und die Hauptinhalte des Dokuments geben.
|
||||
Sie sollte etwa 3-5 Sätze umfassen und als eigenständiger Abschnitt funktionieren.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "Du bist ein Experte für die Erstellung prägnanter Dokumentzusammenfassungen."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
summary = await self.chat_service.call_api(messages)
|
||||
|
||||
return summary.strip()
|
||||
|
||||
async def generate_toc_with_prompts(self, task: str, document_type: str, title: str, summary: str) -> Dict[str, str]:
|
||||
"""
|
||||
Generiert ein Inhaltsverzeichnis mit Prompts für die einzelnen Kapitel.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabe/Anfrage
|
||||
document_type: Typ des Dokuments
|
||||
title: Titel des Dokuments
|
||||
summary: Zusammenfassung des Dokuments
|
||||
|
||||
Returns:
|
||||
Dict mit Kapiteltiteln als Schlüssel und Prompts als Werte
|
||||
"""
|
||||
prompt = f"""
|
||||
Erstelle ein strukturiertes Inhaltsverzeichnis für folgendes Dokument:
|
||||
|
||||
TITEL: {title}
|
||||
TYP: {document_type.capitalize()}
|
||||
AUFTRAG: {task}
|
||||
ZUSAMMENFASSUNG: {summary}
|
||||
|
||||
Für jedes Kapitel gib auch einen kurzen Prompt an, der beschreibt, was in diesem Kapitel behandelt werden soll.
|
||||
Formatiere deine Antwort als JSON-Objekt mit folgendem Format:
|
||||
{{
|
||||
"Kapitel 1: Titel": "Prompt für Kapitel 1",
|
||||
"Kapitel 2: Titel": "Prompt für Kapitel 2",
|
||||
...
|
||||
}}
|
||||
|
||||
Beschränke dich auf 5-7 sinnvolle Kapitel, die das Thema umfassend behandeln.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "Du bist ein Experte für die Strukturierung von Dokumenten und die Erstellung von Inhaltsverzeichnissen."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
toc_response = await self.chat_service.call_api(messages)
|
||||
|
||||
# JSON aus der Antwort extrahieren
|
||||
import json
|
||||
import re
|
||||
|
||||
# Markdown-Code-Blöcke entfernen, falls vorhanden
|
||||
toc_response = re.sub(r'```json\s*|\s*```', '', toc_response)
|
||||
|
||||
try:
|
||||
toc_with_prompts = json.loads(toc_response)
|
||||
return toc_with_prompts
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Fehler beim Parsen des Inhaltsverzeichnisses: {str(e)}")
|
||||
logger.error(f"Rohe Antwort: {toc_response}")
|
||||
# Notfall-Fallback
|
||||
return {
|
||||
"1. Einleitung": "Einführung in das Thema und Überblick",
|
||||
"2. Hauptteil": "Hauptinhalte des Dokuments",
|
||||
"3. Schlussfolgerung": "Zusammenfassung und nächste Schritte"
|
||||
}
|
||||
|
||||
async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str,
|
||||
task: str, document_type: str, title: str, summary: str) -> str:
|
||||
"""
|
||||
Generiert den Inhalt für ein bestimmtes Kapitel.
|
||||
|
||||
Args:
|
||||
chapter_title: Titel des Kapitels
|
||||
chapter_prompt: Prompt für das Kapitel
|
||||
task: Die Aufgabe/Anfrage
|
||||
document_type: Typ des Dokuments
|
||||
title: Titel des Dokuments
|
||||
summary: Zusammenfassung des Dokuments
|
||||
|
||||
Returns:
|
||||
Generierter Kapitelinhalt
|
||||
"""
|
||||
prompt = f"""
|
||||
Erstelle detaillierten Inhalt für folgendes Kapitel eines {document_type}s:
|
||||
|
||||
DOKUMENT-TITEL: {title}
|
||||
AUFGABE: {task}
|
||||
KAPITEL: {chapter_title}
|
||||
ANWEISUNG FÜR DIESES KAPITEL: {chapter_prompt}
|
||||
|
||||
Der Inhalt sollte detailliert, informativ und gut strukturiert sein.
|
||||
Verwende bei Bedarf Unterüberschriften, Aufzählungen und Tabellen zur besseren Strukturierung.
|
||||
Der Inhalt sollte direkt mit dem Kapiteltext beginnen, ohne den Kapiteltitel zu wiederholen.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "Du bist ein Experte für die Erstellung hochwertiger Dokumentationsinhalte."},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
chapter_content = await self.chat_service.call_api(messages)
|
||||
|
||||
return chapter_content.strip()
|
||||
|
||||
def _format_final_document(self, title: str, summary: str, toc: Dict[str, str], chapter_contents: Dict[str, str]) -> str:
|
||||
"""
|
||||
Formatiert das endgültige Dokument aus allen Teilen.
|
||||
|
||||
Args:
|
||||
title: Titel des Dokuments
|
||||
summary: Zusammenfassung
|
||||
toc: Inhaltsverzeichnis (Dict mit Kapiteltiteln als Schlüssel)
|
||||
chapter_contents: Kapitelinhalte (Dict mit Kapiteltiteln als Schlüssel und Inhalten als Werte)
|
||||
|
||||
Returns:
|
||||
Formatiertes Dokument
|
||||
"""
|
||||
# Titel formatieren
|
||||
doc = f"# {title}\n\n"
|
||||
|
||||
# Zusammenfassung hinzufügen
|
||||
doc += f"## Zusammenfassung\n\n{summary}\n\n"
|
||||
|
||||
# Inhaltsverzeichnis hinzufügen
|
||||
doc += "## Inhaltsverzeichnis\n\n"
|
||||
for idx, chapter in enumerate(toc.keys(), 1):
|
||||
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
|
||||
clean_chapter = chapter
|
||||
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
|
||||
clean_chapter = chapter.split('. ', 1)[1]
|
||||
|
||||
doc += f"{idx}. {clean_chapter}\n"
|
||||
doc += "\n"
|
||||
|
||||
# Kapitelinhalte hinzufügen
|
||||
for idx, (chapter, content) in enumerate(chapter_contents.items(), 1):
|
||||
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
|
||||
clean_chapter = chapter
|
||||
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
|
||||
clean_chapter = chapter.split('. ', 1)[1]
|
||||
|
||||
doc += f"## {idx}. {clean_chapter}\n\n{content}\n\n"
|
||||
|
||||
# Metadaten hinzufügen
|
||||
doc += "---\n\n"
|
||||
doc += f"**Erstellt durch:** {self.name}\n"
|
||||
|
||||
return doc
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext
|
||||
|
||||
Returns:
|
||||
Die generierte Dokumentation
|
||||
"""
|
||||
try:
|
||||
# Chat-Service initialisieren, falls noch nicht geschehen
|
||||
if self.chat_service is None:
|
||||
self.chat_service = ChatService()
|
||||
|
||||
# Task aus der Nachricht extrahieren
|
||||
task = message.get("content", "")
|
||||
if context and "task" in context:
|
||||
task = context["task"]
|
||||
|
||||
# Dokumenttyp erkennen
|
||||
document_type = self._detect_document_type(task)
|
||||
|
||||
logger.info(f"Starte Dokumentationserstellung für Typ: {document_type}")
|
||||
|
||||
# Schritt 1: Titel generieren
|
||||
title = await self.generate_title(task, document_type)
|
||||
logger.info(f"Titel generiert: {title}")
|
||||
|
||||
# Schritt 2: Zusammenfassung generieren
|
||||
summary = await self.generate_summary(task, document_type, title)
|
||||
logger.info("Zusammenfassung generiert")
|
||||
|
||||
# Schritt 3: Inhaltsverzeichnis mit Prompts generieren
|
||||
toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary)
|
||||
logger.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert")
|
||||
|
||||
# Schritt 4: Kapitelinhalte in einer Schleife generieren
|
||||
chapter_contents = {}
|
||||
for chapter_title, chapter_prompt in toc_with_prompts.items():
|
||||
logger.info(f"Generiere Inhalt für Kapitel: {chapter_title}")
|
||||
content = await self.generate_chapter_content(
|
||||
chapter_title, chapter_prompt, task, document_type, title, summary
|
||||
)
|
||||
chapter_contents[chapter_title] = content
|
||||
|
||||
# Schritt 5: Dokument zusammenführen
|
||||
final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents)
|
||||
logger.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen")
|
||||
|
||||
# Schritt 6: Antwort zurückgeben
|
||||
documentation_response = {
|
||||
"role": "assistant",
|
||||
"content": f"{final_document}\n\n[STATUS: ERGEBNIS]",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(documentation_response["content"])
|
||||
documentation_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
|
||||
# Chat-Service schließen
|
||||
await self.chat_service.close()
|
||||
self.chat_service = None
|
||||
documentation_response["result_format"] = self.result_format
|
||||
|
||||
return documentation_response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Dokumentationserstellung: {str(e)}", exc_info=True)
|
||||
|
||||
# Chat-Service schließen bei Fehler
|
||||
if self.chat_service:
|
||||
try:
|
||||
await self.chat_service.close()
|
||||
except:
|
||||
pass
|
||||
self.chat_service = None
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Singleton-Instanz
|
||||
_documentation_agent = None
|
||||
|
||||
def get_documentation_agent():
|
||||
"""Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
|
||||
global _documentation_agent
|
||||
if _documentation_agent is None:
|
||||
_documentation_agent = DocumentationAgent()
|
||||
return _documentation_agent
|
||||
168
gwserver/_old_bk_modules/agentservice_agent_filecreator.py
Normal file
168
gwserver/_old_bk_modules/agentservice_agent_filecreator.py
Normal file
|
|
@ -0,0 +1,168 @@
|
|||
"""
|
||||
Filecreator-Agent für die Erstellung von Dateien mit Inhalten und deren Speicherung in der Datenbank (Fortsetzung).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import base64
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class FilecreatorAgent(BaseAgent):
|
||||
"""Agent für die Erstellung und Speicherung von Dateien"""
|
||||
|
||||
# (Vorherige Implementierung hier)
|
||||
|
||||
def _extract_file_params(self, message_content: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Extrahiert Dateiparameter aus dem Nachrichteninhalt.
|
||||
|
||||
Args:
|
||||
message_content: Inhalt der Nachricht
|
||||
|
||||
Returns:
|
||||
Dictionary mit Dateiparametern
|
||||
"""
|
||||
# Grundlegende Parameter
|
||||
file_params = {
|
||||
"name": "document.txt",
|
||||
"content": "",
|
||||
"type": "text/plain"
|
||||
}
|
||||
|
||||
# Einfache Heuristik zur Extraktion der Parameter
|
||||
lines = message_content.split('\n')
|
||||
content_lines = []
|
||||
is_content_section = False
|
||||
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
|
||||
# Dateiname erkennen
|
||||
if line.startswith("DATEINAME:") or line.startswith("FILENAME:"):
|
||||
file_params["name"] = line.split(":", 1)[1].strip()
|
||||
|
||||
# Dateityp erkennen
|
||||
elif line.startswith("TYP:") or line.startswith("TYPE:"):
|
||||
file_type = line.split(":", 1)[1].strip().lower()
|
||||
|
||||
# MIME-Typ anhand der Angabe setzen
|
||||
if file_type in ["text", "txt", "plain"]:
|
||||
file_params["type"] = "text/plain"
|
||||
if not file_params["name"].endswith(".txt"):
|
||||
file_params["name"] += ".txt"
|
||||
|
||||
elif file_type in ["markdown", "md"]:
|
||||
file_params["type"] = "text/markdown"
|
||||
if not file_params["name"].endswith(".md"):
|
||||
file_params["name"] += ".md"
|
||||
|
||||
elif file_type in ["csv"]:
|
||||
file_params["type"] = "text/csv"
|
||||
if not file_params["name"].endswith(".csv"):
|
||||
file_params["name"] += ".csv"
|
||||
|
||||
elif file_type in ["json"]:
|
||||
file_params["type"] = "application/json"
|
||||
if not file_params["name"].endswith(".json"):
|
||||
file_params["name"] += ".json"
|
||||
|
||||
elif file_type in ["html"]:
|
||||
file_params["type"] = "text/html"
|
||||
if not file_params["name"].endswith(".html"):
|
||||
file_params["name"] += ".html"
|
||||
|
||||
# Inhalt sammeln
|
||||
elif line == "INHALT:" or line == "CONTENT:":
|
||||
is_content_section = True
|
||||
continue
|
||||
|
||||
elif is_content_section:
|
||||
content_lines.append(line)
|
||||
|
||||
# Wenn kein Inhalt gefunden wurde, versuche den gesamten Inhalt zu verwenden
|
||||
if not content_lines and not is_content_section:
|
||||
# Ignoriere die ersten und letzten Zeilen (können Anweisungen sein)
|
||||
if len(lines) > 4:
|
||||
content_lines = lines[2:-2]
|
||||
else:
|
||||
content_lines = lines
|
||||
|
||||
# Inhalt zusammensetzen
|
||||
file_params["content"] = "\n".join(content_lines)
|
||||
|
||||
# Dateiformat aus dem Dateinamen ableiten, falls nicht explizit angegeben
|
||||
if "type" not in file_params:
|
||||
file_extension = file_params["name"].split(".")[-1].lower() if "." in file_params["name"] else ""
|
||||
if file_extension == "md":
|
||||
file_params["type"] = "text/markdown"
|
||||
elif file_extension == "csv":
|
||||
file_params["type"] = "text/csv"
|
||||
elif file_extension == "json":
|
||||
file_params["type"] = "application/json"
|
||||
elif file_extension == "html":
|
||||
file_params["type"] = "text/html"
|
||||
else:
|
||||
file_params["type"] = "text/plain"
|
||||
|
||||
return file_params
|
||||
|
||||
async def _create_and_save_file(self, file_params: Dict[str, Any], lucydom_interface) -> Tuple[str, str, str]:
|
||||
"""
|
||||
Erstellt und speichert eine Datei in der Datenbank.
|
||||
|
||||
Args:
|
||||
file_params: Parameter für die Dateierstellung
|
||||
lucydom_interface: Interface für Datenbankzugriffe
|
||||
|
||||
Returns:
|
||||
Tuple mit (file_id, file_name, file_type)
|
||||
"""
|
||||
if not lucydom_interface:
|
||||
raise ValueError("Kein LucyDOM-Interface verfügbar für die Dateispeicherung")
|
||||
|
||||
# Dateiparameter extrahieren
|
||||
file_name = file_params.get("name", "document.txt")
|
||||
file_content = file_params.get("content", "")
|
||||
content_type = file_params.get("type", "text/plain")
|
||||
|
||||
# Dateityp aus dem Content-Type ableiten
|
||||
file_type = "document" # Standard-Dateityp
|
||||
if content_type.startswith("image/"):
|
||||
file_type = "image"
|
||||
|
||||
# Binäre Dateidaten erstellen
|
||||
file_data = file_content.encode('utf-8')
|
||||
|
||||
# Datei über LucyDOM-Interface speichern
|
||||
try:
|
||||
file_meta = lucydom_interface.save_uploaded_file(file_data, file_name)
|
||||
|
||||
if not file_meta or "id" not in file_meta:
|
||||
raise ValueError("Fehler beim Speichern der Datei")
|
||||
|
||||
file_id = file_meta["id"]
|
||||
|
||||
# Dateityp aktualisieren, falls notwendig
|
||||
update_data = {"type": file_type, "content_type": content_type}
|
||||
lucydom_interface.update_file(file_id, update_data)
|
||||
|
||||
return file_id, file_name, file_type
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Speichern der Datei {file_name}: {str(e)}")
|
||||
raise ValueError(f"Fehler beim Speichern der Datei: {str(e)}")
|
||||
|
||||
|
||||
# Singleton-Instanz
|
||||
_filecreator_agent = None
|
||||
|
||||
def get_filecreator_agent():
|
||||
"""Gibt eine Singleton-Instanz des FileCreator-Agenten zurück"""
|
||||
global _filecreator_agent
|
||||
if _filecreator_agent is None:
|
||||
_filecreator_agent = FilecreatorAgent()
|
||||
return _filecreator_agent
|
||||
175
gwserver/_old_bk_modules/agentservice_agent_sharepoint.py
Normal file
175
gwserver/_old_bk_modules/agentservice_agent_sharepoint.py
Normal file
|
|
@ -0,0 +1,175 @@
|
|||
"""
|
||||
Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SharepointAgent(BaseAgent):
|
||||
"""Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den SharePoint-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "sharepoint_agent"
|
||||
self.name = "SharePoint-Agent"
|
||||
self.type = "sharepoint"
|
||||
self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen"
|
||||
self.capabilities = "Suche und Abruf von Dokumenten aus SharePoint, Dokumentenverwaltung, Metadaten-Extraktion und Integration von SharePoint-Inhalten"
|
||||
self.instructions = """
|
||||
Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es:
|
||||
|
||||
1. SharePoint-Dokumente und -Ressourcen zu durchsuchen und abzurufen
|
||||
2. Metadaten aus SharePoint-Dokumenten zu extrahieren und zu analysieren
|
||||
3. Strukturierte Informationen aus SharePoint-Bibliotheken zu sammeln
|
||||
4. Dokumente basierend auf Metadaten zu filtern und zu organisieren
|
||||
5. Inhalte aus verschiedenen SharePoint-Quellen zu integrieren und zusammenzuführen
|
||||
6. Informationen aus SharePoint-Listen und -Dokumentbibliotheken zu extrahieren
|
||||
7. Zusammenfassungen und Analysen von SharePoint-Inhalten zu erstellen
|
||||
|
||||
Bei der Darstellung deiner Ergebnisse:
|
||||
- Strukturiere die Informationen klar und übersichtlich
|
||||
- Gib den Ursprung und die Metadaten der Dokumente an
|
||||
- Zeige Beziehungen zwischen verschiedenen Dokumenten und Ressourcen auf
|
||||
- Hebe wichtige Erkenntnisse und Muster hervor
|
||||
- Biete Kontext und Relevanz für die gefundenen Informationen
|
||||
"""
|
||||
self.result_format = "DocumentList"
|
||||
|
||||
def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generiert einen angepassten Prompt für den SharePoint-Agenten.
|
||||
|
||||
Args:
|
||||
message_context: Kontext der Nachricht
|
||||
|
||||
Returns:
|
||||
Formatierter Prompt für den SharePoint-Agenten
|
||||
"""
|
||||
# Basis-Prompt vom BaseAgent holen
|
||||
base_prompt = super().get_prompt(message_context)
|
||||
|
||||
# Zusätzliche Anweisungen für SharePoint-Interaktion
|
||||
sharepoint_instructions = """
|
||||
SHAREPOINT-INTERAKTIONS-RICHTLINIEN:
|
||||
|
||||
1. Präzisiere die Suchkriterien für SharePoint-Ressourcen
|
||||
2. Identifiziere relevante Bibliotheken, Listen und Standorte
|
||||
3. Definiere benötigte Metadaten und Inhalte
|
||||
4. Berücksichtige Berechtigungsanforderungen
|
||||
5. Priorisiere aktuelle und relevante Dokumente
|
||||
6. Stelle eine strukturierte Darstellung der Ergebnisse sicher
|
||||
|
||||
Für eine gute SharePoint-Integration:
|
||||
- Gib detaillierte Pfade und Standorte an
|
||||
- Berücksichtige verschiedene Dokumenttypen und Formate
|
||||
- Zeige Metadaten und Dokumenteigenschaften
|
||||
- Biete Kontext zu den gefundenen Ressourcen
|
||||
- Berücksichtige Versionsinformationen
|
||||
"""
|
||||
|
||||
# Task aus dem Kontext extrahieren
|
||||
task = message_context.get("task", "")
|
||||
task_instructions = f"\nSHAREPOINT-AUFTRAG:\n{task}\n" if task else ""
|
||||
|
||||
# Vollständigen Prompt zusammenbauen
|
||||
complete_prompt = f"{base_prompt}\n\n{sharepoint_instructions}\n{task_instructions}"
|
||||
|
||||
return complete_prompt.strip()
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und interagiert mit SharePoint.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort mit SharePoint-Inhalten
|
||||
"""
|
||||
# Hier würde die tatsächliche Interaktion mit SharePoint stattfinden
|
||||
# In der finalen Implementierung würde ein SharePoint-Connector verwendet werden
|
||||
|
||||
# Als Beispiel geben wir eine Standardantwort zurück
|
||||
sharepoint_response = {
|
||||
"role": "assistant",
|
||||
"content": f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:
|
||||
|
||||
## SharePoint-Suchergebnisse
|
||||
|
||||
Basierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert:
|
||||
|
||||
### Dokumente
|
||||
1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025)
|
||||
- Standort: Projekte/Strategische Planung
|
||||
- Autor: Maria Schmidt
|
||||
- Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine
|
||||
|
||||
2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025)
|
||||
- Standort: Marketing/Marktforschung
|
||||
- Autor: Thomas Müller
|
||||
- Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken
|
||||
|
||||
3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025)
|
||||
- Standort: Finanzen/Planung
|
||||
- Autor: Sarah Weber
|
||||
- Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen
|
||||
|
||||
### SharePoint-Listen
|
||||
1. **Projektstatusliste**
|
||||
- 12 Einträge mit relevanten Projektstatusinformationen
|
||||
- Letzte Aktualisierung: 25.03.2025
|
||||
|
||||
## Zusammenfassung der Inhalte
|
||||
|
||||
Die gefundenen Dokumente zeigen übereinstimmend, dass:
|
||||
- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt
|
||||
- Das Budget für Forschung und Entwicklung um 15% erhöht wurde
|
||||
- Drei neue Hauptprojekte im zweiten Quartal starten werden
|
||||
|
||||
## Empfehlungen
|
||||
|
||||
Basierend auf den gefundenen Informationen empfehle ich:
|
||||
1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen
|
||||
2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen
|
||||
3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden
|
||||
|
||||
Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.
|
||||
|
||||
[STATUS: ERGEBNIS]""",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(sharepoint_response["content"])
|
||||
sharepoint_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
sharepoint_response["result_format"] = self.result_format
|
||||
|
||||
return sharepoint_response
|
||||
|
||||
# Singleton-Instanz
|
||||
_sharepoint_agent = None
|
||||
|
||||
def get_sharepoint_agent():
|
||||
"""Gibt eine Singleton-Instanz des SharePoint-Agenten zurück"""
|
||||
global _sharepoint_agent
|
||||
if _sharepoint_agent is None:
|
||||
_sharepoint_agent = SharepointAgent()
|
||||
return _sharepoint_agent
|
||||
512
gwserver/_old_bk_modules/agentservice_agent_webcrawler.py
Normal file
512
gwserver/_old_bk_modules/agentservice_agent_webcrawler.py
Normal file
|
|
@ -0,0 +1,512 @@
|
|||
"""
|
||||
WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
import urllib
|
||||
from urllib.parse import quote_plus, unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WebcrawlerAgent(BaseAgent):
|
||||
"""Agent für Web-Recherche und Informationsbeschaffung"""
|
||||
|
||||
_instance = None
|
||||
|
||||
chat_service = ChatService()
|
||||
|
||||
#INIT --> should go to config
|
||||
max_url=3
|
||||
max_key=3
|
||||
|
||||
max_result=3
|
||||
|
||||
timeout = 10
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
}
|
||||
max_urls = 10
|
||||
max_content_length=100000
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den WebCrawler-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "webcrawler_agent"
|
||||
self.name = "Webscraper"
|
||||
self.type = "scraper"
|
||||
self.description = "Recherchiert Informationen im Web"
|
||||
self.capabilities = "Informationsrecherche, Datenbeschaffung aus dem Web, Quellenbewertung und Zusammenführung von Online-Informationen"
|
||||
self.instructions = ""
|
||||
self.result_format = "SearchResults"
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
try:
|
||||
# Führe die Web-Recherche durch und warte auf das Ergebnis mit await
|
||||
web_query_result = await self.get_web_query(message)
|
||||
|
||||
# Antwort-Objekt erstellen
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": f"{web_query_result} [STATUS: ERGEBNIS]",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(response["content"])
|
||||
response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
response["result_format"] = self.result_format
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Web-Recherche: {str(e)}", exc_info=True)
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Web-Recherche ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
async def get_web_query(self, message_context: Dict[str, Any]) -> str:
|
||||
prompt = await self.get_prompt(message_context)
|
||||
result_json = await self.run_web_query(prompt)
|
||||
result_data = ""
|
||||
summary_src = ""
|
||||
|
||||
logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.")
|
||||
if isinstance(result_json, list):
|
||||
for i, result in enumerate(result_json, 1):
|
||||
|
||||
web_answer_instructions = f"""
|
||||
Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
|
||||
Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
|
||||
Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
|
||||
|
||||
Dies ist das Resultat:
|
||||
{result['data']}
|
||||
"""
|
||||
|
||||
# Zusätzliche Anweisungen für Web-Recherche
|
||||
content_text = await self.chat_service.call_api(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": web_answer_instructions
|
||||
}
|
||||
]
|
||||
)
|
||||
result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_text}"
|
||||
summary_src+=f"\n{content_text}"
|
||||
else:
|
||||
result_data = "no data received"
|
||||
|
||||
logger.info(f"Web analysis result sent {len(result_data)}B")
|
||||
|
||||
# Zusätzliche Zusammenfassung
|
||||
summary=""
|
||||
if len(summary_src)>1:
|
||||
summary = await self.chat_service.call_api(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src}\n"
|
||||
}
|
||||
]
|
||||
)
|
||||
result = f"{summary}\n\n{result_data}"
|
||||
return result
|
||||
|
||||
|
||||
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
task = message_context.get("content", "")
|
||||
return task.strip()
|
||||
|
||||
|
||||
async def run_web_query(self, prompt: str) -> List[Dict]:
|
||||
if prompt=="":
|
||||
return []
|
||||
|
||||
ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
|
||||
|
||||
'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
|
||||
|
||||
'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
|
||||
|
||||
Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
|
||||
"""
|
||||
|
||||
content_text = await self.chat_service.call_api(
|
||||
messages=[
|
||||
{
|
||||
"role": "system",
|
||||
"content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": ptext
|
||||
} ]
|
||||
)
|
||||
# Remove markdown formatting if present
|
||||
if content_text.startswith("```json"):
|
||||
# Find the end of the JSON block
|
||||
end_marker = "```"
|
||||
end_index = content_text.rfind(end_marker)
|
||||
if end_index != -1:
|
||||
# Extract the JSON content without the markdown markers
|
||||
content_text = content_text[7:end_index].strip()
|
||||
|
||||
# Now parse the JSON
|
||||
try:
|
||||
logger.info(f"Valid json received: {str(content_text)}")
|
||||
pjson = json.loads(content_text)
|
||||
# Now call scrape_json with the parsed dictionary
|
||||
result_json = await self.scrape_json(pjson)
|
||||
return result_json
|
||||
except json.JSONDecodeError as e:
|
||||
logger.error(f"Failed to parse JSON: {e}")
|
||||
logger.error(f"Cleaned content: {content_text[:100]}...")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
|
||||
"""
|
||||
Scrapes web content based on a research strategy JSON.
|
||||
|
||||
Args:
|
||||
research_strategy: A dictionary containing:
|
||||
- 'skey': List of search keywords
|
||||
- 'url': List of direct URLs to scrape
|
||||
|
||||
Returns:
|
||||
Dictionary with URLs as keys and scraped content as values
|
||||
"""
|
||||
|
||||
logger.info("Starting JSON-based web scraping")
|
||||
results = []
|
||||
|
||||
# Validate input structure
|
||||
if not isinstance(research_strategy, dict):
|
||||
logger.error("Invalid research_strategy format: not a dictionary")
|
||||
return {"error": "Invalid research_strategy format: not a dictionary"}
|
||||
|
||||
keys = research_strategy.get("skey", [])
|
||||
direct_urls = research_strategy.get("url", [])
|
||||
|
||||
if not isinstance(keys, list) or not isinstance(direct_urls, list):
|
||||
logger.error("Invalid research_strategy format: keys, or url is not a list")
|
||||
return {"error": "Invalid research_strategy format: keys, or url is not a list"}
|
||||
|
||||
# Process search keywords through search engine
|
||||
for keyword in keys:
|
||||
logger.info(f"Processing keyword: {keyword}")
|
||||
found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data
|
||||
logger.info(f"... {len(found_results)} results found")
|
||||
results.extend(found_results)
|
||||
|
||||
# Process direct URLs
|
||||
logger.info(f"Processing {len(direct_urls)} direct URLs")
|
||||
for url in direct_urls:
|
||||
if url in results:
|
||||
logger.info(f"Skipping already scraped URL: {url}")
|
||||
continue
|
||||
soup=self.read_url(url)
|
||||
|
||||
# Extract title from the page if it exists
|
||||
if isinstance(soup, BeautifulSoup):
|
||||
title_tag = soup.find('title')
|
||||
title = title_tag.text.strip() if title_tag else "No title"
|
||||
|
||||
# Alternative: You could also look for h1 tags if the title tag is missing
|
||||
if title == "No title":
|
||||
h1_tag = soup.find('h1')
|
||||
if h1_tag:
|
||||
title = h1_tag.text.strip()
|
||||
else:
|
||||
# Handle the case where soup is an error message string
|
||||
title = "Error fetching page"
|
||||
|
||||
results.append(self.parse_result(soup,"No title",url))
|
||||
logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
|
||||
return results
|
||||
|
||||
|
||||
def search_web(self, query: str) -> List[Dict]:
|
||||
formatted_query = quote_plus(query)
|
||||
url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
|
||||
|
||||
search_results_soup = self.read_url(url)
|
||||
if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
|
||||
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
|
||||
return []
|
||||
|
||||
# Extract search results
|
||||
results = []
|
||||
|
||||
# Find all result containers
|
||||
result_elements = search_results_soup.select('.result')
|
||||
|
||||
for result in result_elements:
|
||||
# Extract title
|
||||
title_element = result.select_one('.result__a')
|
||||
title = title_element.text.strip() if title_element else 'No title'
|
||||
|
||||
# Extract URL (DuckDuckGo uses redirects, need to extract from href param)
|
||||
url_element = title_element.get('href') if title_element else ''
|
||||
extracted_url = 'No URL'
|
||||
|
||||
if url_element:
|
||||
# Extract the actual URL from DuckDuckGo's redirect
|
||||
if url_element.startswith('/d.js?q='):
|
||||
start = url_element.find('?q=') + 3 # Skip '?q='
|
||||
end = url_element.find('&', start) if '&' in url_element[start:] else None
|
||||
extracted_url = unquote(url_element[start:end])
|
||||
|
||||
# Make sure the URL has the correct protocol prefix
|
||||
if not extracted_url.startswith(('http://', 'https://')):
|
||||
if not extracted_url.startswith('//'):
|
||||
extracted_url = 'https://' + extracted_url
|
||||
else:
|
||||
extracted_url = 'https:' + extracted_url
|
||||
else:
|
||||
extracted_url = url_element
|
||||
|
||||
# Extract snippet directly from search results page
|
||||
snippet_element = result.select_one('.result__snippet')
|
||||
snippet = snippet_element.text.strip() if snippet_element else 'No description'
|
||||
|
||||
# Now fetch the actual page content for the data field
|
||||
target_page_soup = self.read_url(extracted_url)
|
||||
|
||||
results.append({
|
||||
'title': title,
|
||||
'url': extracted_url,
|
||||
'snippet': snippet,
|
||||
'data': str(target_page_soup) if isinstance(target_page_soup, BeautifulSoup) else "Error fetching page"
|
||||
})
|
||||
|
||||
# Limit the number of results if needed
|
||||
if len(results) >= self.max_result:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def read_url(self, url: str) -> BeautifulSoup:
|
||||
"""
|
||||
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
|
||||
Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
|
||||
|
||||
Args:
|
||||
url: Die zu lesende URL
|
||||
|
||||
Returns:
|
||||
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
|
||||
"""
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
|
||||
try:
|
||||
import time
|
||||
|
||||
# Initialer Request
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
|
||||
# Polling für Status 202
|
||||
if response.status_code == 202:
|
||||
# Maximal 3 Versuche mit steigenden Intervallen
|
||||
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s
|
||||
|
||||
for wait_time in backoff_times:
|
||||
time.sleep(wait_time) # Warten mit steigender Zeit
|
||||
response = requests.get(url, headers=headers, timeout=10)
|
||||
|
||||
# Wenn kein 202 mehr, dann abbrechen
|
||||
if response.status_code != 202:
|
||||
break
|
||||
|
||||
# Für andere Fehler-Status einen Fehler auslösen
|
||||
response.raise_for_status()
|
||||
|
||||
# HTML parsen
|
||||
return BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
except Exception as e:
|
||||
# Leeres BeautifulSoup-Objekt erstellen
|
||||
return BeautifulSoup("<html><body></body></html>", 'html.parser')
|
||||
|
||||
|
||||
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
|
||||
# Extract snippet/description
|
||||
snippet_element = data.select_one('.result__snippet')
|
||||
snippet = snippet_element.text.strip() if snippet_element else 'No description'
|
||||
|
||||
result={
|
||||
'title': title,
|
||||
'url': url,
|
||||
'snippet': snippet,
|
||||
'data': data.prettify()
|
||||
}
|
||||
return result
|
||||
|
||||
|
||||
def _old_scrape_url(self, url: str) -> str:
|
||||
try:
|
||||
logger.info(f"Requesting URL: {url}")
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
title = soup.title.string if soup.title else "No title"
|
||||
for element in soup.select('script, style, meta, noscript, iframe, nav, footer, header, aside'):
|
||||
element.extract()
|
||||
main_content = ""
|
||||
|
||||
# Common content containers
|
||||
content_selectors = [
|
||||
'main', '#main', '.main',
|
||||
'article', '.article',
|
||||
'#content', '.content',
|
||||
'.post', '#post',
|
||||
'.entry-content', '.post-content',
|
||||
'.page-content', '.article-content'
|
||||
]
|
||||
|
||||
# Try each selector
|
||||
for selector in content_selectors:
|
||||
elements = soup.select(selector)
|
||||
if elements:
|
||||
main_content = elements[0].get_text(separator='\n', strip=True)
|
||||
logger.info(f"Found content using selector: {selector}")
|
||||
break
|
||||
|
||||
# If no main content found, use body text
|
||||
if not main_content:
|
||||
main_content = soup.body.get_text(separator='\n', strip=True)
|
||||
logger.info("Using body text as no main content container found")
|
||||
|
||||
# Clean up the text
|
||||
lines = []
|
||||
for line in main_content.split('\n'):
|
||||
line = line.strip()
|
||||
if line and len(line) > 15: # Skip very short lines
|
||||
lines.append(line)
|
||||
|
||||
main_content = '\n'.join(lines)
|
||||
|
||||
# Truncate if too long
|
||||
if len(main_content) > self.max_content_length:
|
||||
main_content = main_content[:self.max_content_length] + "...\n[Inhalt gekürzt]"
|
||||
|
||||
return main_content.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Scrapen von {url}: {str(e)}")
|
||||
return f"[Fehler beim Scrapen von {url}: {str(e)}]"
|
||||
|
||||
|
||||
def _old_extract_urls_from_search_results(self, html_content: str) -> List[str]:
|
||||
"""
|
||||
Extracts URLs from search engine results.
|
||||
|
||||
Args:
|
||||
html_content: HTML content of the search results page
|
||||
|
||||
Returns:
|
||||
List of extracted URLs
|
||||
"""
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
urls = []
|
||||
|
||||
# Different search engines have different HTML structures
|
||||
# Google links
|
||||
for a_tag in soup.select('a[href^="/url?"]'):
|
||||
href = a_tag.get('href', '')
|
||||
if '/url?q=' in href:
|
||||
url = href.split('/url?q=')[1].split('&')[0]
|
||||
url = urllib.parse.unquote(url)
|
||||
if url.startswith('http') and url not in urls:
|
||||
urls.append(url)
|
||||
|
||||
# Bing links
|
||||
for a_tag in soup.select('a[href^="http"]'):
|
||||
url = a_tag.get('href', '')
|
||||
excluded_domains = getattr(self, 'excluded_domains', [])
|
||||
if (url.startswith('http') and
|
||||
not any(domain in url for domain in excluded_domains) and
|
||||
url not in urls):
|
||||
urls.append(url)
|
||||
|
||||
# Yahoo links
|
||||
for a_tag in soup.select('a.d-ib'):
|
||||
url = a_tag.get('href', '')
|
||||
if url.startswith('http') and url not in urls:
|
||||
urls.append(url)
|
||||
|
||||
# If no URLs found, try a more generic approach
|
||||
if not urls:
|
||||
for a_tag in soup.find_all('a', href=True):
|
||||
url = a_tag['href']
|
||||
excluded_domains = getattr(self, 'excluded_domains', [])
|
||||
if (url.startswith('http') and
|
||||
not any(domain in url for domain in excluded_domains) and
|
||||
url not in urls):
|
||||
urls.append(url)
|
||||
|
||||
# Limit the number of results
|
||||
return urls[:self.max_urls]
|
||||
|
||||
|
||||
|
||||
|
||||
# Singleton-Instanz
|
||||
_webcrawler_agent = None
|
||||
|
||||
def get_webcrawler_agent():
|
||||
"""Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück"""
|
||||
global _webcrawler_agent
|
||||
if _webcrawler_agent is None:
|
||||
_webcrawler_agent = WebcrawlerAgent()
|
||||
return _webcrawler_agent
|
||||
124
gwserver/_old_bk_modules/agentservice_base.py
Normal file
124
gwserver/_old_bk_modules/agentservice_base.py
Normal file
|
|
@ -0,0 +1,124 @@
|
|||
"""
|
||||
Erweiterte Basisklasse für Agenten im Agentservice.
|
||||
Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class BaseAgent:
|
||||
"""Basisklasse für alle Agenten im System"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Basis-Agenten"""
|
||||
self.id = "base_agent"
|
||||
self.name = "Base Agent"
|
||||
self.type = "base"
|
||||
self.description = "Basisagent als Vorlage für spezialisierte Agenten"
|
||||
self.capabilities = "Grundlegende Agentenoperationen"
|
||||
self.instructions = """
|
||||
Als Basis-Agent kannst du grundlegende Aufgaben erledigen.
|
||||
Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden.
|
||||
"""
|
||||
# Neues Attribut für das Rückgabeformat
|
||||
self.result_format = "Text" # Standard: Textformat
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Gibt Informationen über den Agenten zurück.
|
||||
|
||||
Returns:
|
||||
Dict mit Agenten-Informationen
|
||||
"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"type": self.type,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"instructions": self.instructions,
|
||||
"result_format": self.result_format, # Rückgabeformat hinzugefügt
|
||||
"used": False, # Wird zur Laufzeit aktualisiert
|
||||
"last_result_status": None # Wird zur Laufzeit aktualisiert
|
||||
}
|
||||
|
||||
def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Generiert einen an den Agenten angepassten Prompt basierend auf Kontext.
|
||||
|
||||
Args:
|
||||
message_context: Kontext der Nachricht
|
||||
|
||||
Returns:
|
||||
Formatierter Prompt für den Agenten
|
||||
"""
|
||||
# Basis-Prompt, der von spezialisierten Agenten überschrieben werden kann
|
||||
base_prompt = f"""
|
||||
Du bist {self.name}, ein {self.type} Agent.
|
||||
|
||||
{self.description}
|
||||
|
||||
Deine Fähigkeiten: {self.capabilities}
|
||||
|
||||
{self.instructions}
|
||||
|
||||
Rückgabeformat: {self.result_format}
|
||||
|
||||
Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage.
|
||||
Deklariere am Ende deiner Antwort den Status deines Ergebnisses:
|
||||
[STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast
|
||||
[STATUS: TEILWEISE] - Wenn du ein teilweises Ergebnis geliefert hast
|
||||
[STATUS: PLAN] - Wenn du nur einen Plan vorgeschlagen hast
|
||||
"""
|
||||
|
||||
return base_prompt.strip()
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und generiert eine Antwort.
|
||||
Sollte von spezialisierten Agenten überschrieben werden.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext (optional)
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort
|
||||
"""
|
||||
# Basis-Implementierung, die einfach eine Standardantwort zurückgibt
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]",
|
||||
"agent_type": self.type,
|
||||
"result_format": self.result_format # Rückgabeformat in der Antwort
|
||||
}
|
||||
|
||||
def extract_status(self, content: str) -> Tuple[str, str]:
|
||||
"""
|
||||
Extrahiert den Status aus dem Inhalt der Antwort.
|
||||
|
||||
Args:
|
||||
content: Inhalt der Antwort
|
||||
|
||||
Returns:
|
||||
Tuple mit (bereinigter Text, Status)
|
||||
"""
|
||||
import re
|
||||
|
||||
# Standard-Status, falls keine Deklaration gefunden wird
|
||||
status = "UNBEKANNT"
|
||||
|
||||
# Suche nach Status-Deklaration
|
||||
status_pattern = r'\[STATUS:\s*(ERGEBNIS|TEILWEISE|PLAN)\]'
|
||||
match = re.search(status_pattern, content, re.IGNORECASE)
|
||||
|
||||
if match:
|
||||
# Extrahiere den Status
|
||||
status = match.group(1).upper()
|
||||
|
||||
# Entferne die Status-Deklaration aus dem Text
|
||||
content = re.sub(status_pattern, '', content, flags=re.IGNORECASE).strip()
|
||||
|
||||
return content, status
|
||||
556
gwserver/_old_bk_modules/agentservice_code_executor.py
Normal file
556
gwserver/_old_bk_modules/agentservice_code_executor.py
Normal file
|
|
@ -0,0 +1,556 @@
|
|||
# code_executor.py
|
||||
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
import subprocess
|
||||
import tempfile
|
||||
import re
|
||||
from typing import Dict, List, Optional, Tuple, Any
|
||||
import importlib.util
|
||||
import logging
|
||||
|
||||
# Logging einrichten
|
||||
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CodeExecutor:
|
||||
"""
|
||||
Führt generierten Code in einer isolierten virtuellen Umgebung aus,
|
||||
während Zugriff auf spezifische App-Module gewährt wird und
|
||||
automatisch erforderliche Pakete installiert werden.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
app_modules: List[str] = None,
|
||||
venv_path: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
max_memory_mb: int = 512,
|
||||
allowed_packages: List[str] = None,
|
||||
blocked_packages: List[str] = None):
|
||||
"""
|
||||
Initialisiert den CodeExecutor.
|
||||
|
||||
Args:
|
||||
app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen
|
||||
venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt
|
||||
timeout: Maximale Ausführungszeit in Sekunden
|
||||
max_memory_mb: Maximaler Arbeitsspeicher in MB
|
||||
allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte)
|
||||
blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive)
|
||||
"""
|
||||
self.app_modules = app_modules or []
|
||||
self.venv_path = venv_path
|
||||
self.timeout = timeout
|
||||
self.max_memory_mb = max_memory_mb
|
||||
self.temp_dir = None
|
||||
self.allowed_packages = allowed_packages
|
||||
self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"]
|
||||
|
||||
def _create_venv(self) -> str:
|
||||
"""Erstellt eine virtuelle Umgebung und gibt den Pfad zurück."""
|
||||
if self.venv_path and os.path.exists(self.venv_path):
|
||||
return self.venv_path
|
||||
|
||||
# Temporäres Verzeichnis für die virtuelle Umgebung erstellen
|
||||
self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_")
|
||||
venv_path = os.path.join(self.temp_dir, "venv")
|
||||
|
||||
try:
|
||||
# Virtuelle Umgebung erstellen
|
||||
logger.info(f"Erstelle virtuelle Umgebung in {venv_path}")
|
||||
subprocess.run([sys.executable, "-m", "venv", venv_path],
|
||||
check=True,
|
||||
capture_output=True)
|
||||
return venv_path
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}")
|
||||
raise RuntimeError(f"Konnte venv nicht erstellen: {e}")
|
||||
|
||||
def _get_pip_executable(self, venv_path: str) -> str:
|
||||
"""Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung."""
|
||||
if os.name == 'nt': # Windows
|
||||
return os.path.join(venv_path, "Scripts", "pip.exe")
|
||||
else: # Unix/Linux
|
||||
return os.path.join(venv_path, "bin", "pip")
|
||||
|
||||
def _get_python_executable(self, venv_path: str) -> str:
|
||||
"""Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung."""
|
||||
if os.name == 'nt': # Windows
|
||||
return os.path.join(venv_path, "Scripts", "python.exe")
|
||||
else: # Unix/Linux
|
||||
return os.path.join(venv_path, "bin", "python")
|
||||
|
||||
def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Installiert Pakete in der virtuellen Umgebung.
|
||||
|
||||
Args:
|
||||
packages: Liste der zu installierenden Pakete
|
||||
venv_path: Pfad zur virtuellen Umgebung
|
||||
|
||||
Returns:
|
||||
Tuple aus (Erfolg, Fehlermeldung)
|
||||
"""
|
||||
if not packages:
|
||||
return True, ""
|
||||
|
||||
# Überprüfen, ob Pakete erlaubt sind
|
||||
blocked = []
|
||||
for package in packages:
|
||||
# Paketname ohne Version extrahieren
|
||||
pkg_name = re.split('[=<>]', package)[0].strip()
|
||||
|
||||
if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]:
|
||||
blocked.append(pkg_name)
|
||||
|
||||
if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]:
|
||||
blocked.append(pkg_name)
|
||||
|
||||
if blocked:
|
||||
return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}"
|
||||
|
||||
# Pakete installieren
|
||||
pip_executable = self._get_pip_executable(venv_path)
|
||||
logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}")
|
||||
|
||||
try:
|
||||
# pip aktualisieren
|
||||
subprocess.run(
|
||||
[pip_executable, "install", "--upgrade", "pip"],
|
||||
check=True,
|
||||
capture_output=True,
|
||||
timeout=60
|
||||
)
|
||||
|
||||
# Pakete installieren
|
||||
process = subprocess.run(
|
||||
[pip_executable, "install"] + packages,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120 # 2 Minuten Timeout für Paketinstallation
|
||||
)
|
||||
|
||||
return True, process.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_msg = f"Fehler bei der Paketinstallation: {e.stderr}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "Zeitüberschreitung bei der Paketinstallation."
|
||||
except Exception as e:
|
||||
return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}"
|
||||
|
||||
def _extract_required_packages(self, code: str) -> List[str]:
|
||||
"""
|
||||
Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements
|
||||
und Pip-Installationsanweisungen.
|
||||
|
||||
Args:
|
||||
code: Der Python-Code
|
||||
|
||||
Returns:
|
||||
Liste der erkannten Paketnamen
|
||||
"""
|
||||
packages = set()
|
||||
|
||||
# Paketkommentare erkennen (# pip install package)
|
||||
pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
|
||||
for comment in pip_comments:
|
||||
for pkg in comment.split():
|
||||
if pkg and not pkg.startswith('-'):
|
||||
packages.add(pkg.strip())
|
||||
|
||||
# Import-Statements analysieren
|
||||
import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
|
||||
|
||||
# Standardmodule, die nicht installiert werden müssen
|
||||
std_modules = {
|
||||
'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
|
||||
'collections', 'itertools', 'functools', 'pathlib', 'shutil',
|
||||
'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
|
||||
'traceback', 'io', 'copy'
|
||||
}
|
||||
|
||||
# Module der App, die nicht installiert werden müssen
|
||||
app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules)
|
||||
|
||||
for module in import_lines:
|
||||
if module not in std_modules and module not in app_modules_prefixes:
|
||||
packages.add(module)
|
||||
|
||||
return list(packages)
|
||||
|
||||
def _create_module_loader(self) -> str:
|
||||
"""
|
||||
Erstellt ein Hilfsskript, das App-Module in die venv importiert.
|
||||
Gibt den Pfad zum Hilfsskript zurück.
|
||||
"""
|
||||
if not self.app_modules:
|
||||
return ""
|
||||
|
||||
# Temporäre Datei für den Module-Loader erstellen
|
||||
module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
"module_loader.py")
|
||||
|
||||
# Pfad zu den App-Modulen bestimmen
|
||||
app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
# Modul-Loader-Code generieren
|
||||
loader_code = f"""
|
||||
import sys
|
||||
import importlib.util
|
||||
import os
|
||||
|
||||
# App-Pfad zum Suchpfad hinzufügen
|
||||
sys.path.insert(0, "{app_path}")
|
||||
|
||||
# Module importieren
|
||||
modules = {{}}
|
||||
"""
|
||||
|
||||
# Code zum Importieren der Module hinzufügen
|
||||
for module_name in self.app_modules:
|
||||
loader_code += f"""
|
||||
try:
|
||||
modules["{module_name}"] = __import__("{module_name}", fromlist=["*"])
|
||||
print(f"Modul '{module_name}' erfolgreich importiert")
|
||||
except ImportError as e:
|
||||
print(f"Fehler beim Importieren von '{module_name}': {{e}}")
|
||||
"""
|
||||
|
||||
# Loader-Datei schreiben
|
||||
with open(module_loader_path, "w") as f:
|
||||
f.write(loader_code)
|
||||
|
||||
return module_loader_path
|
||||
|
||||
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt den generierten Code in einer isolierten Umgebung aus.
|
||||
|
||||
Args:
|
||||
code: Der auszuführende Python-Code
|
||||
input_data: Eingabedaten für den Code (werden als JSON serialisiert)
|
||||
|
||||
Returns:
|
||||
Dict mit Ausführungsergebnissen, Ausgabe und Fehlern
|
||||
"""
|
||||
# Virtuelle Umgebung erstellen oder bestehende verwenden
|
||||
venv_path = self._create_venv()
|
||||
|
||||
# Erforderliche Pakete aus dem Code extrahieren
|
||||
required_packages = self._extract_required_packages(code)
|
||||
|
||||
# Pakete installieren, falls erforderlich
|
||||
install_success = True
|
||||
install_log = ""
|
||||
if required_packages:
|
||||
install_success, install_log = self._install_packages(required_packages, venv_path)
|
||||
|
||||
if not install_success:
|
||||
return {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
# Temporäre Datei für den Code erstellen
|
||||
code_id = str(uuid.uuid4())[:8]
|
||||
code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"ai_code_{code_id}.py")
|
||||
|
||||
# Module-Loader erstellen
|
||||
module_loader_path = self._create_module_loader()
|
||||
|
||||
# Eingabedaten als JSON speichern, wenn vorhanden
|
||||
input_path = ""
|
||||
if input_data:
|
||||
import json
|
||||
input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"input_{code_id}.json")
|
||||
with open(input_path, "w") as f:
|
||||
json.dump(input_data, f)
|
||||
|
||||
# Outputpfad für Ergebnisse
|
||||
output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"output_{code_id}.json")
|
||||
|
||||
# Wrapper für den Code erstellen, damit die App-Module verfügbar sind
|
||||
wrapped_code = f"""
|
||||
import sys
|
||||
import json
|
||||
import traceback
|
||||
import os
|
||||
|
||||
# Ergebnisstruktur
|
||||
result = {{
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": "",
|
||||
"result": None,
|
||||
"installed_packages": {required_packages}
|
||||
}}
|
||||
|
||||
try:
|
||||
# Module laden, falls erforderlich
|
||||
if "{module_loader_path}":
|
||||
module_loader = __import__("module_loader")
|
||||
globals().update({{k: v for k, v in module_loader.modules.items()}})
|
||||
|
||||
# Eingabedaten laden, falls vorhanden
|
||||
input_data = None
|
||||
if "{input_path}":
|
||||
with open("{input_path}", "r") as f:
|
||||
input_data = json.load(f)
|
||||
|
||||
# Ausgabeumleitung
|
||||
from io import StringIO
|
||||
original_stdout = sys.stdout
|
||||
original_stderr = sys.stderr
|
||||
captured_stdout = StringIO()
|
||||
captured_stderr = StringIO()
|
||||
sys.stdout = captured_stdout
|
||||
sys.stderr = captured_stderr
|
||||
|
||||
# Benutzercode ausführen
|
||||
try:
|
||||
# Den Code in einem lokalen Namespace ausführen
|
||||
local_vars = {{"input_data": input_data}}
|
||||
exec('''{code}''', globals(), local_vars)
|
||||
|
||||
# Ergebnis speichern, falls eine Variable 'result' definiert wurde
|
||||
if "result" in local_vars:
|
||||
result["result"] = local_vars["result"]
|
||||
|
||||
result["success"] = True
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["error"] += "\\n" + traceback.format_exc()
|
||||
finally:
|
||||
# Ausgabe erfassen
|
||||
result["output"] = captured_stdout.getvalue()
|
||||
result["error"] += captured_stderr.getvalue()
|
||||
|
||||
# Ausgabeumleitung zurücksetzen
|
||||
sys.stdout = original_stdout
|
||||
sys.stderr = original_stderr
|
||||
|
||||
except Exception as outer_e:
|
||||
result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}"
|
||||
|
||||
# Ergebnis speichern
|
||||
with open("{output_path}", "w") as f:
|
||||
json.dump(result, f, default=str)
|
||||
"""
|
||||
|
||||
# Code in temporäre Datei schreiben
|
||||
with open(code_file_path, "w") as f:
|
||||
f.write(wrapped_code)
|
||||
|
||||
# Python-Interpreter aus der virtuellen Umgebung bestimmen
|
||||
python_executable = self._get_python_executable(venv_path)
|
||||
|
||||
# Code ausführen
|
||||
logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}")
|
||||
try:
|
||||
# Prozess mit Ressourcenbeschränkungen ausführen
|
||||
cmd = [python_executable, code_file_path]
|
||||
|
||||
# Umgebungsvariablen setzen, um Speicherlimit zu erzwingen
|
||||
env = os.environ.copy()
|
||||
if self.max_memory_mb:
|
||||
if os.name == 'posix': # Unix/Linux
|
||||
# Auf Unix-Systemen können wir ulimit verwenden
|
||||
cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"]
|
||||
elif os.name == 'nt': # Windows
|
||||
# Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden
|
||||
# Hier müsste eine komplexere Lösung implementiert werden
|
||||
pass
|
||||
|
||||
# Prozess starten und mit Timeout ausführen
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
timeout=self.timeout,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Ergebnis aus der Ausgabedatei lesen
|
||||
if os.path.exists(output_path):
|
||||
with open(output_path, "r") as f:
|
||||
import json
|
||||
execution_result = json.load(f)
|
||||
else:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": process.stdout,
|
||||
"error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
except Exception as e:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Fehler bei der Ausführung: {str(e)}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
# Informationen zur Paketinstallation hinzufügen
|
||||
if install_log:
|
||||
execution_result["package_install_log"] = install_log
|
||||
|
||||
# Temporäre Dateien aufräumen
|
||||
self._cleanup_temp_files([code_file_path, input_path, output_path])
|
||||
|
||||
return execution_result
|
||||
|
||||
def _cleanup_temp_files(self, file_paths: List[str]):
|
||||
"""Räumt temporäre Dateien auf."""
|
||||
for path in file_paths:
|
||||
if path and os.path.exists(path):
|
||||
try:
|
||||
os.remove(path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}")
|
||||
|
||||
def cleanup(self):
|
||||
"""Räumt alle temporären Ressourcen auf."""
|
||||
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||
import shutil
|
||||
try:
|
||||
shutil.rmtree(self.temp_dir)
|
||||
logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}")
|
||||
|
||||
def __del__(self):
|
||||
"""Aufräumen beim Garbage Collection."""
|
||||
self.cleanup()
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Beispiel zur Verwendung des erweiterten CodeExecutor in einem AI Chat
|
||||
|
||||
# from code_executor import CodeExecutor
|
||||
|
||||
def execute_ai_generated_code(prompt_result: str, input_data=None):
|
||||
"""
|
||||
Führt von einer KI generierten Code aus und installiert automatisch benötigte Pakete
|
||||
|
||||
Args:
|
||||
prompt_result: Der von der KI generierte Python-Code
|
||||
input_data: Optionale Eingabedaten für den Code
|
||||
|
||||
Returns:
|
||||
Ergebnis der Code-Ausführung
|
||||
"""
|
||||
# Verfügbare App-Module definieren
|
||||
available_modules = [
|
||||
"utils.sharepoint_crud",
|
||||
# Weitere Module hier hinzufügen
|
||||
]
|
||||
|
||||
# Liste erlaubter Pakete (optional)
|
||||
allowed_packages = None # None bedeutet alle erlaubt, außer blockierte
|
||||
|
||||
# Liste blockierter Pakete (Sicherheitsrisiken oder ressourcenintensive Pakete)
|
||||
blocked_packages = [
|
||||
"cryptography", "flask", "django", "tornado", # Sicherheit
|
||||
"tensorflow", "pytorch", "scikit-learn", # Ressourcenintensiv
|
||||
]
|
||||
|
||||
# CodeExecutor initialisieren
|
||||
executor = CodeExecutor(
|
||||
app_modules=available_modules,
|
||||
timeout=120, # 2 Minuten Timeout
|
||||
max_memory_mb=1024, # 1GB Speicherlimit
|
||||
allowed_packages=allowed_packages,
|
||||
blocked_packages=blocked_packages
|
||||
)
|
||||
|
||||
try:
|
||||
# Code ausführen
|
||||
result = executor.execute_code(prompt_result, input_data)
|
||||
|
||||
if result["success"]:
|
||||
print("Code erfolgreich ausgeführt!")
|
||||
print(f"Ausgabe: {result['output']}")
|
||||
|
||||
# Zeige installierte Pakete an
|
||||
if "installed_packages" in result and result["installed_packages"]:
|
||||
print(f"Installierte Pakete: {', '.join(result['installed_packages'])}")
|
||||
|
||||
return result["result"]
|
||||
else:
|
||||
print(f"Fehler bei der Ausführung: {result['error']}")
|
||||
return None
|
||||
finally:
|
||||
# Aufräumen
|
||||
executor.cleanup()
|
||||
|
||||
# Beispiel für die Verwendung
|
||||
if __name__ == "__main__":
|
||||
# Angenommen, dies ist der von der KI generierte Code mit Paketabhängigkeiten
|
||||
ai_generated_code = """
|
||||
# pip install pandas matplotlib
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
import utils.sharepoint_crud as sp
|
||||
|
||||
# Daten aus input_data verwenden
|
||||
file_path = input_data.get('file_path')
|
||||
site_url = input_data.get('site_url')
|
||||
|
||||
# Beispieldaten erstellen
|
||||
data = pd.DataFrame({
|
||||
'Monat': ['Jan', 'Feb', 'Mär', 'Apr', 'Mai'],
|
||||
'Umsatz': [1200, 1400, 1300, 1500, 1800]
|
||||
})
|
||||
|
||||
# Plot erstellen
|
||||
plt.figure(figsize=(10, 6))
|
||||
plt.bar(data['Monat'], data['Umsatz'])
|
||||
plt.title('Umsatz nach Monat')
|
||||
plt.savefig('umsatz_plot.png')
|
||||
print('Diagramm erstellt und gespeichert')
|
||||
|
||||
# SharePoint-Datei hochladen
|
||||
result = sp.upload_file(file_path, site_url)
|
||||
print(f"Datei wurde hochgeladen: {result}")
|
||||
|
||||
# Ergebnis zurückgeben
|
||||
result = {
|
||||
'data': data.to_dict(),
|
||||
'plot_saved': True,
|
||||
'upload_result': result
|
||||
}
|
||||
"""
|
||||
|
||||
# Daten für den Code bereitstellen
|
||||
data = {
|
||||
"file_path": "/path/to/document.docx",
|
||||
"site_url": "https://example.sharepoint.com/sites/mysite"
|
||||
}
|
||||
|
||||
# Code ausführen
|
||||
execute_ai_generated_code(ai_generated_code, data)
|
||||
|
||||
475
gwserver/_old_bk_modules/agentservice_dataextraction.py
Normal file
475
gwserver/_old_bk_modules/agentservice_dataextraction.py
Normal file
|
|
@ -0,0 +1,475 @@
|
|||
"""
|
||||
Hilfsfunktion für die intelligente Extraktion von Dateninhalten (Fortsetzung).
|
||||
"""
|
||||
|
||||
from datetime import datetime
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import asyncio
|
||||
import copy
|
||||
|
||||
# Import erweiterte Dateiverarbeitung
|
||||
from gateway.gwserver.modules.agentservice_filemanager import extract_text_from_file_content
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def data_extraction(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
lucydom_interface = None,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen,
|
||||
und führt dann die notwendigen Extraktionen durch.
|
||||
|
||||
Args:
|
||||
prompt: Spezifizierung, welche Daten extrahiert werden sollen
|
||||
files: Liste aller verfügbaren Dateien mit Metadaten
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
ai_service: Service für KI-Anfragen
|
||||
lucydom_interface: Interface für Datenbankzugriffe (optional)
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen
|
||||
"""
|
||||
try:
|
||||
# 1. AI Call zur Bestimmung der notwendigen Extraktionen
|
||||
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
|
||||
|
||||
# 2. Extraktionen durchführen
|
||||
extracted_data = await _execute_extractions(
|
||||
extraction_plan,
|
||||
files,
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id,
|
||||
add_log_func
|
||||
)
|
||||
|
||||
# 3. Extrahierte Daten strukturieren
|
||||
structured_result = _structure_extracted_data(extracted_data, files, prompt)
|
||||
|
||||
return structured_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True)
|
||||
|
||||
# Fehler-Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error")
|
||||
|
||||
# Fehler-Ergebnis zurückgeben
|
||||
return {
|
||||
"error": str(e),
|
||||
"status": "error",
|
||||
"files_processed": len(files),
|
||||
"message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}"
|
||||
}
|
||||
|
||||
async def _create_extraction_plan(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Erstellt einen Extraktionsplan mit AI-Unterstützung.
|
||||
|
||||
Args:
|
||||
prompt: Spezifizierung, welche Daten extrahiert werden sollen
|
||||
files: Liste aller verfügbaren Dateien mit Metadaten
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
ai_service: Service für KI-Anfragen
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Extraktionsplan (Liste von Extraktionsanweisungen pro Datei)
|
||||
"""
|
||||
# Erstelle Kontext-Informationen für den AI Call
|
||||
file_infos = []
|
||||
for file in files:
|
||||
# Basis-Metadaten
|
||||
file_info = {
|
||||
"id": file.get("id", ""),
|
||||
"name": file.get("name", ""),
|
||||
"type": file.get("type", ""),
|
||||
"content_type": file.get("content_type", ""),
|
||||
"size": file.get("size", "")
|
||||
}
|
||||
|
||||
# Extraktionsstatus prüfen (falls vorhanden)
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
|
||||
if doc_contents:
|
||||
# Prüfen, ob mindestens ein Content mit is_extracted=True existiert
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
)
|
||||
file_info["already_extracted"] = already_extracted
|
||||
|
||||
# Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar)
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text" and content.get("text"):
|
||||
preview_text = content.get("text", "")[:200] + "..." if len(content.get("text", "")) > 200 else content.get("text", "")
|
||||
file_info["content_preview"] = preview_text
|
||||
break
|
||||
else:
|
||||
file_info["already_extracted"] = False
|
||||
|
||||
file_infos.append(file_info)
|
||||
|
||||
# AI-Prompt erstellen
|
||||
extraction_prompt = f"""
|
||||
Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien
|
||||
und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen.
|
||||
|
||||
AUFGABE:
|
||||
{prompt}
|
||||
|
||||
VERFÜGBARE DATEIEN:
|
||||
{json.dumps(file_infos, indent=2)}
|
||||
|
||||
Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen:
|
||||
1. file_id: Die ID der zu extrahierenden Datei
|
||||
2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird)
|
||||
3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien)
|
||||
4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist)
|
||||
|
||||
Format:
|
||||
[
|
||||
{{
|
||||
"file_id": "1234",
|
||||
"extract_needed": true,
|
||||
"extraction_prompt": "Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen",
|
||||
"importance": 5
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
Gib nur das JSON-Array zurück, ohne weitere Erklärungen.
|
||||
"""
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, "Extraktionsplan wird erstellt...", "info")
|
||||
|
||||
try:
|
||||
# AI-Call durchführen
|
||||
extraction_plan_response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
||||
|
||||
# JSON aus der Antwort extrahieren
|
||||
import re
|
||||
json_match = re.search(r'\[.*\]', extraction_plan_response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
extraction_plan = json.loads(json_match.group(0))
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Extraktionsplan erstellt für {len(extraction_plan)} Dateien",
|
||||
"info"
|
||||
)
|
||||
|
||||
return extraction_plan
|
||||
else:
|
||||
# Fallback bei Parsing-Problemen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
"Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan",
|
||||
"warning"
|
||||
)
|
||||
|
||||
# Standard-Plan: Alle nicht extrahierten Dateien extrahieren
|
||||
default_plan = []
|
||||
for file in files:
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
) if doc_contents else False
|
||||
|
||||
default_plan.append({
|
||||
"file_id": file.get("id", ""),
|
||||
"extract_needed": not already_extracted,
|
||||
"extraction_prompt": f"Extrahiere alle relevanten Informationen aus {file.get('name', '')}",
|
||||
"importance": 3
|
||||
})
|
||||
|
||||
return default_plan
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", exc_info=True)
|
||||
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}",
|
||||
"error"
|
||||
)
|
||||
|
||||
# Leerer Plan bei Fehlern
|
||||
return []
|
||||
|
||||
async def _execute_extractions(
|
||||
extraction_plan: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Führt die geplanten Extraktionen durch.
|
||||
|
||||
Args:
|
||||
extraction_plan: Liste von Extraktionsanweisungen
|
||||
files: Liste aller verfügbaren Dateien
|
||||
lucydom_interface: Interface für Datenbankzugriffe
|
||||
ai_service: Service für KI-Anfragen
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Liste mit extrahierten Daten pro Datei
|
||||
"""
|
||||
extracted_data = []
|
||||
|
||||
# Nach Wichtigkeit sortieren
|
||||
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
|
||||
|
||||
for extraction_item in sorted_plan:
|
||||
file_id = extraction_item.get("file_id")
|
||||
extract_needed = extraction_item.get("extract_needed", False)
|
||||
extraction_prompt = extraction_item.get("extraction_prompt", "")
|
||||
|
||||
# Dateimetadaten finden
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), None)
|
||||
|
||||
if not file_metadata:
|
||||
logger.warning(f"Datei mit ID {file_id} nicht gefunden")
|
||||
continue
|
||||
|
||||
file_name = file_metadata.get("name", "")
|
||||
file_type = file_metadata.get("type", "")
|
||||
content_type = file_metadata.get("content_type", "")
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Verarbeite Datei: {file_name} (Extraktion notwendig: {extract_needed})",
|
||||
"info"
|
||||
)
|
||||
|
||||
# Extraktion nur durchführen, wenn notwendig
|
||||
if extract_needed:
|
||||
# Dateiinhalt über LucyDOM-Interface abrufen
|
||||
if lucydom_interface:
|
||||
try:
|
||||
file_content = await lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if not file_content:
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Datei {file_name} nicht gefunden", "warning")
|
||||
continue
|
||||
|
||||
# Extraktion basierend auf Dateityp durchführen
|
||||
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
# Bildanalyse mit AI-Service
|
||||
if ai_service and hasattr(ai_service, "analyze_image"):
|
||||
try:
|
||||
image_analysis = await ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=extraction_prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": image_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_method": "image_analysis"
|
||||
})
|
||||
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Bild {file_name} erfolgreich analysiert", "info")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Bildanalyse {file_name}: {str(e)}")
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Fehler bei der Bildanalyse {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
# Fallback, wenn keine Bildanalyse verfügbar
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Bild: {file_name} (Analyse nicht verfügbar)",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
else:
|
||||
# Text-basierte Extraktion für alle anderen Dateitypen
|
||||
try:
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_method": "text_extraction"
|
||||
})
|
||||
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Datei {file_name} extrahiert (Status: {is_extracted})",
|
||||
"info"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Textextraktion {file_name}: {str(e)}")
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Fehler bei der Textextraktion {file_name}: {str(e)}", "error")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Lesen der Datei {file_name}: {str(e)}")
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Fehler beim Lesen der Datei {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
logger.warning(f"Kein LucyDOM-Interface verfügbar für Datei {file_name}")
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Kein LucyDOM-Interface verfügbar für Datei {file_name}", "warning")
|
||||
else:
|
||||
# Keine Extraktion notwendig, vorhandene Inhalte verwenden
|
||||
doc_contents = _extract_document_contents_from_messages(file_id, messages)
|
||||
|
||||
if doc_contents:
|
||||
# Ersten Textinhalt verwenden
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text":
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content.get("text", ""),
|
||||
"is_extracted": content.get("is_extracted", False),
|
||||
"extraction_method": "existing_content"
|
||||
})
|
||||
break
|
||||
else:
|
||||
# Keine vorhandenen Inhalte gefunden
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Keine Inhalte verfügbar für {file_name}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
|
||||
return extracted_data
|
||||
|
||||
def _structure_extracted_data(
|
||||
extracted_data: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
prompt: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Strukturiert die extrahierten Daten in ein formatiertes Ergebnis.
|
||||
|
||||
Args:
|
||||
extracted_data: Liste der extrahierten Daten pro Datei
|
||||
files: Liste aller verfügbaren Dateien
|
||||
prompt: Ursprünglicher Extraktionsprompt
|
||||
|
||||
Returns:
|
||||
Strukturiertes Ergebnisobjekt
|
||||
"""
|
||||
# Basis-Struktur erstellen
|
||||
result = {
|
||||
"prompt": prompt,
|
||||
"files_processed": len(extracted_data),
|
||||
"total_files": len(files),
|
||||
"extraction_timestamp": datetime.now().isoformat(),
|
||||
"status": "success",
|
||||
"extracted_content": []
|
||||
}
|
||||
|
||||
# Extrahierte Inhalte hinzufügen
|
||||
for data_item in extracted_data:
|
||||
# Datei Metadaten anreichern
|
||||
file_id = data_item.get("file_id", "")
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), {})
|
||||
|
||||
content_item = {
|
||||
"file_id": file_id,
|
||||
"name": data_item.get("name", file_metadata.get("name", "")),
|
||||
"type": data_item.get("type", file_metadata.get("type", "")),
|
||||
"content_type": file_metadata.get("content_type", ""),
|
||||
"size": file_metadata.get("size", ""),
|
||||
"is_extracted": data_item.get("is_extracted", False),
|
||||
"extraction_method": data_item.get("extraction_method", ""),
|
||||
"content": data_item.get("content", "")
|
||||
}
|
||||
|
||||
result["extracted_content"].append(content_item)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_document_contents_from_messages(file_id: str, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Document-Contents für eine bestimmte Datei aus den Workflow-Nachrichten.
|
||||
|
||||
Args:
|
||||
file_id: ID der Datei
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
|
||||
Returns:
|
||||
Liste der Document-Contents für die angegebene Datei
|
||||
"""
|
||||
contents = []
|
||||
|
||||
for message in messages:
|
||||
# Dokumente in der Nachricht durchsuchen
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
|
||||
# Prüfen, ob die Datei-ID übereinstimmt
|
||||
if source.get("id") == file_id or source.get("type") == "file" and source.get("id") == file_id:
|
||||
# Contents der Datei hinzufügen
|
||||
doc_contents = document.get("contents", [])
|
||||
|
||||
if doc_contents:
|
||||
contents.extend(doc_contents)
|
||||
|
||||
return contents
|
||||
|
||||
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
|
||||
# Log über die Logger-Instanz
|
||||
if log_type == "error":
|
||||
logger.error(message)
|
||||
elif log_type == "warning":
|
||||
logger.warning(message)
|
||||
else:
|
||||
logger.info(message)
|
||||
|
||||
# Log über die bereitgestellte Log-Funktion (falls vorhanden)
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
|
||||
|
|
@ -89,9 +89,66 @@ def prepare_file_contexts(files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|||
|
||||
return file_contexts
|
||||
|
||||
def is_text_extractable(file_name: str, content_type: str = None) -> bool:
|
||||
"""
|
||||
Prüft, ob aus der Datei Text extrahiert werden kann.
|
||||
|
||||
Args:
|
||||
file_name: Name der Datei für die Erkennung des Formats
|
||||
content_type: Optional MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
True wenn Text extrahiert werden kann, sonst False
|
||||
"""
|
||||
# Einfache Textdateien
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
||||
return True
|
||||
|
||||
# Excel-Dateien
|
||||
elif file_name.endswith(('.xlsx', '.xls')):
|
||||
return pd is not None # Nur extrahierbar, wenn pandas installiert ist
|
||||
|
||||
# PDF-Dateien - Textextraktion ist möglich
|
||||
elif file_name.endswith('.pdf'):
|
||||
try:
|
||||
# Prüfen ob PyPDF2 oder PyMuPDF installiert sind
|
||||
try:
|
||||
import PyPDF2
|
||||
return True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
# Bildformate - nicht als Text extrahierbar
|
||||
elif file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg')):
|
||||
return False
|
||||
|
||||
# Video-Formate - nicht als Text extrahierbar
|
||||
elif file_name.endswith(('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')):
|
||||
return False
|
||||
|
||||
# Audio-Formate - nicht als Text extrahierbar
|
||||
elif file_name.endswith(('.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
||||
return False
|
||||
|
||||
# Content-Type prüfen, falls Dateiendung nicht eindeutig ist
|
||||
if content_type:
|
||||
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
||||
return True
|
||||
elif content_type == 'application/pdf':
|
||||
return True
|
||||
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return False
|
||||
|
||||
# Im Zweifelsfall versuchen zu extrahieren
|
||||
return True
|
||||
|
||||
|
||||
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> str:
|
||||
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:
|
||||
"""
|
||||
Extrahiert Text aus verschiedenen Dateiformaten basierend auf dem Binärinhalt.
|
||||
|
||||
|
|
@ -101,18 +158,22 @@ def extract_text_from_file_content(file_content: bytes, file_name: str, content_
|
|||
content_type: Optional MIME-Typ der Datei
|
||||
|
||||
Returns:
|
||||
Extrahierter Text oder Fehlermeldung
|
||||
Tuple mit (extrahierter Text, is_extracted Flag)
|
||||
"""
|
||||
# Prüfen, ob Text extrahierbar ist
|
||||
if not is_text_extractable(file_name, content_type):
|
||||
return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False
|
||||
|
||||
try:
|
||||
# Einfache Textdateien
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py')):
|
||||
try:
|
||||
return file_content.decode('utf-8')
|
||||
return file_content.decode('utf-8'), True
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
return file_content.decode('latin1')
|
||||
return file_content.decode('latin1'), True
|
||||
except:
|
||||
return file_content.decode('cp1252', errors='replace')
|
||||
return file_content.decode('cp1252', errors='replace'), True
|
||||
|
||||
# Excel-Dateien
|
||||
elif file_name.endswith(('.xlsx', '.xls')):
|
||||
|
|
@ -123,9 +184,9 @@ def extract_text_from_file_content(file_content: bytes, file_name: str, content_
|
|||
result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result
|
||||
return result, True
|
||||
else:
|
||||
return f"[Excel-Datei: {file_name} - pandas nicht installiert]"
|
||||
return f"[Excel-Datei: {file_name} - pandas nicht installiert]", False
|
||||
|
||||
# CSV-Dateien
|
||||
elif file_name.endswith('.csv'):
|
||||
|
|
@ -145,43 +206,41 @@ def extract_text_from_file_content(file_content: bytes, file_name: str, content_
|
|||
result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result
|
||||
return result, True
|
||||
else:
|
||||
return f"[CSV-Datei: {file_name} - pandas nicht installiert]"
|
||||
return f"[CSV-Datei: {file_name} - pandas nicht installiert]", False
|
||||
|
||||
# PDF-Dateien
|
||||
elif file_name.endswith('.pdf'):
|
||||
try:
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
# BytesIO is already imported at the top level
|
||||
reader = PdfReader(BytesIO(file_content))
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n\n"
|
||||
return text
|
||||
return text, True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
# BytesIO is already imported at the top level
|
||||
doc = fitz.open(stream=file_content, filetype="pdf")
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += page.get_text() + "\n\n"
|
||||
return text
|
||||
return text, True
|
||||
except ImportError:
|
||||
return f"[PDF: {file_name} - Keine PDF-Bibliothek installiert]"
|
||||
return f"[PDF: {file_name} - Keine PDF-Bibliothek installiert]", False
|
||||
except Exception as e:
|
||||
raise FileExtractionError(f"Fehler beim Lesen der PDF-Datei {file_name}: {str(e)}")
|
||||
|
||||
# Sonstige Dateien
|
||||
else:
|
||||
return f"[Datei: {file_name} - Textextraktion nicht unterstützt]"
|
||||
return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Extrahieren von Text aus {file_name}: {str(e)}")
|
||||
raise FileExtractionError(f"Fehler beim Extrahieren von Text aus {file_name}: {str(e)}")
|
||||
|
||||
return f"[Fehler bei der Textextraktion: {str(e)}]", False
|
||||
|
||||
async def extract_and_analyze_pdf_images(
|
||||
pdf_content: bytes,
|
||||
prompt: str,
|
||||
|
|
@ -296,11 +355,9 @@ async def extract_and_analyze_pdf_images(
|
|||
|
||||
return image_responses
|
||||
|
||||
|
||||
def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Fügt eine Datei zu einer Nachricht hinzu.
|
||||
Funktion für Workflow-Manager und interne Verwendung.
|
||||
Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde.
|
||||
|
||||
Args:
|
||||
message: Die zu erweiternde Nachricht
|
||||
|
|
@ -309,7 +366,7 @@ def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> D
|
|||
Returns:
|
||||
Die aktualisierte Nachricht mit der Datei
|
||||
"""
|
||||
# Detailed logging for debugging
|
||||
# Detailliertes Logging für Debugging
|
||||
logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})")
|
||||
|
||||
# Initialize documents array if needed
|
||||
|
|
@ -318,6 +375,7 @@ def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> D
|
|||
logger.debug("Initialized empty documents array in message")
|
||||
|
||||
# Create a unique ID for the document if not provided
|
||||
import uuid
|
||||
doc_id = file_data.get("id", f"file_{uuid.uuid4()}")
|
||||
|
||||
# Extract file size if available
|
||||
|
|
@ -328,27 +386,39 @@ def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> D
|
|||
# Estimate size from content if not provided
|
||||
file_size = len(file_data.get("content", ""))
|
||||
|
||||
# Bestimmen, ob der Inhalt bereits extrahiert wurde
|
||||
content = file_data.get("content", "No content available")
|
||||
file_name = file_data.get("name", "unnamed_file")
|
||||
content_type = file_data.get("content_type")
|
||||
|
||||
# Prüfen, ob der Inhalt als extrahiert markiert werden sollte
|
||||
is_extracted = file_data.get("is_extracted", False)
|
||||
if not is_extracted and isinstance(content, str) and content.strip() and file_name:
|
||||
# Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp
|
||||
is_extracted = is_text_extractable(file_name, content_type)
|
||||
|
||||
# Create standard document structure that matches the data model
|
||||
document = {
|
||||
"id": doc_id, # Add an ID to the document itself
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": file_data.get("id", doc_id),
|
||||
"name": file_data.get("name", "unnamed_file"),
|
||||
"content_type": file_data.get("content_type"),
|
||||
"name": file_name,
|
||||
"content_type": content_type,
|
||||
"size": file_size,
|
||||
"upload_date": file_data.get("upload_date", datetime.now().isoformat())
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": file_data.get("content", "No content available")
|
||||
"text": content,
|
||||
"is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Log document structure for debugging
|
||||
logger.debug(f"Created document structure: {json.dumps({k: v for k, v in document.items() if k != 'contents'})}")
|
||||
logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}")
|
||||
|
||||
# Check if file is already in the message to avoid duplicates
|
||||
file_already_added = any(
|
||||
|
|
@ -358,14 +428,12 @@ def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> D
|
|||
|
||||
if not file_already_added:
|
||||
message["documents"].append(document)
|
||||
logger.info(f"File {file_data.get('name')} successfully added to message (total: {len(message.get('documents', []))} files)")
|
||||
logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)")
|
||||
else:
|
||||
logger.info(f"File {file_data.get('name')} already exists in message, skipping")
|
||||
logger.info(f"File {file_name} already exists in message, skipping")
|
||||
|
||||
return message
|
||||
|
||||
|
||||
|
||||
def extract_files_from_message(message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Dateiinformationen aus einer Nachricht.
|
||||
|
|
@ -413,18 +481,17 @@ def extract_files_from_message(message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
|||
logger.info(f"Extracted {len(files)} files from message")
|
||||
return files
|
||||
|
||||
|
||||
|
||||
async def read_file_contents(
|
||||
file_contexts: List[Dict[str, Any]],
|
||||
lucydom_interface,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None,
|
||||
ai_service = None # AI service parameter for image analysis
|
||||
) -> Dict[str, str]:
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch.
|
||||
Verwendet LucyDOM-Interface statt direkter Dateizugriffe.
|
||||
Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück.
|
||||
|
||||
Args:
|
||||
file_contexts: Liste der Dateikontexte mit Metadaten
|
||||
|
|
@ -434,7 +501,7 @@ async def read_file_contents(
|
|||
ai_service: Optionaler AI-Service für die Bildanalyse
|
||||
|
||||
Returns:
|
||||
Dictionary mit Dateiinhalten (file_id -> content)
|
||||
Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...})
|
||||
"""
|
||||
file_contents = {}
|
||||
|
||||
|
|
@ -452,56 +519,96 @@ async def read_file_contents(
|
|||
|
||||
if not file_data:
|
||||
_log(add_log_func, workflow_id, f"Datei {file_name} nicht gefunden", "warning")
|
||||
file_contents[file_id] = f"File content not available (File not found)"
|
||||
file_contents[file_id] = {
|
||||
"content": f"File content not available (File not found)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})")
|
||||
# Image files - always perform image analysis if AI service is available
|
||||
|
||||
# Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar
|
||||
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
if ai_service:
|
||||
if ai_service and hasattr(ai_service, "analyze_image"):
|
||||
try:
|
||||
#_log(add_log_func, workflow_id, f"Analyzing image {file_name} {len(file_data)}B...", "info")
|
||||
logger.info(f"ai_service type: {type(ai_service)}")
|
||||
logger.info(f"ai_service methods: {dir(ai_service)}")
|
||||
logger.info(f"ai_service has analyze_image method: {'analyze_image' in dir(ai_service)}")
|
||||
|
||||
image_analysis = await ai_service.analyze_image(
|
||||
image_data=file_data,
|
||||
prompt="Describe this image in detail",
|
||||
mime_type=file.get("content_type")
|
||||
)
|
||||
|
||||
|
||||
logger.debug(f"Image analysis successfully generated for {file_name}")
|
||||
|
||||
file_contents[file_id] = f"Image Analysis:\n{image_analysis}"
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image Analysis:\n{image_analysis}",
|
||||
"is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
_log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {file_name}: {str(e)}")
|
||||
_log(add_log_func, workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error")
|
||||
file_contents[file_id] = f"Image file: {file_name} (Analysis failed: {str(e)})"
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image file: {file_name} (Analysis failed: {str(e)})",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
else:
|
||||
file_contents[file_id] = f"Image file: {file_name} (AI analysis not available)"
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image file: {file_name} (AI analysis not available)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
|
||||
# Document files
|
||||
# Dokument- und Textdateien
|
||||
elif file_type == "document" or not file_type:
|
||||
# Verwende die zentrale Textextraktionsfunktion mit Dateiinhalt
|
||||
content = extract_text_from_file_content(file_data, file_name, file.get("content_type"))
|
||||
file_contents[file_id] = content
|
||||
_log(add_log_func, workflow_id, f"File {file_name} read successfully", "info")
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_data, file_name, file.get("content_type")
|
||||
)
|
||||
file_contents[file_id] = {
|
||||
"content": content,
|
||||
"is_extracted": is_extracted,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
_log(add_log_func, workflow_id,
|
||||
f"File {file_name} read successfully (extracted: {is_extracted})", "info")
|
||||
|
||||
# Other file types - just store metadata
|
||||
# Andere Dateitypen - nur Metadaten speichern
|
||||
else:
|
||||
file_contents[file_id] = f"File: {file_name} (Type: {file_type}, content not available)"
|
||||
file_contents[file_id] = {
|
||||
"content": f"File: {file_name} (Type: {file_type}, content not available)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
_log(add_log_func, workflow_id, f"Unsupported file type: {file_type} for {file_name}", "warning")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file {file_name}: {str(e)}")
|
||||
_log(add_log_func, workflow_id, f"Error reading file {file_name}: {str(e)}", "error")
|
||||
file_contents[file_id] = f"File content not available (Error: {str(e)})"
|
||||
file_contents[file_id] = {
|
||||
"content": f"File content not available (Error: {str(e)})",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": file.get("content_type")
|
||||
}
|
||||
|
||||
return file_contents
|
||||
|
||||
|
||||
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
|
||||
# Log über die Logger-Instanz
|
||||
|
|
@ -516,3 +623,16 @@ def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name
|
|||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
|
||||
|
||||
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
|
||||
# Log über die Logger-Instanz
|
||||
if log_type == "error":
|
||||
logger.error(message)
|
||||
elif log_type == "warning":
|
||||
logger.warning(message)
|
||||
else:
|
||||
logger.info(message)
|
||||
|
||||
# Log über die bereitgestellte Log-Funktion (falls vorhanden)
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
|
||||
146
gwserver/_old_bk_modules/agentservice_registry.py
Normal file
146
gwserver/_old_bk_modules/agentservice_registry.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""
|
||||
Aktualisierte Registry für alle verfügbaren Agenten im System.
|
||||
Enthält jetzt auch den FileCreator-Agenten.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import importlib
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
# Import direkt bekannter Agent-Module
|
||||
# Andere Module werden dynamisch importiert
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentRegistry:
|
||||
"""Registry für alle verfügbaren Agenten im System"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz der Agent-Registry zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert die Agent-Registry"""
|
||||
if AgentRegistry._instance is not None:
|
||||
raise RuntimeError("Singleton-Instanz existiert bereits - nutze get_instance()")
|
||||
self.agents = {}
|
||||
self._load_agents()
|
||||
|
||||
def _load_agents(self):
|
||||
"""Lädt alle verfügbaren Agenten"""
|
||||
# Liste aller zu ladenden Agenten-Module
|
||||
agent_modules = [
|
||||
"agentservice_agent_coder",
|
||||
"agentservice_agent_analyst",
|
||||
"agentservice_agent_webcrawler",
|
||||
"agentservice_agent_sharepoint",
|
||||
"agentservice_agent_documentation",
|
||||
"agentservice_agent_filecreator" # Neuer FileCreator-Agent hinzugefügt
|
||||
]
|
||||
|
||||
for module_name in agent_modules:
|
||||
try:
|
||||
# Importiere das Modul
|
||||
try:
|
||||
module = importlib.import_module(f"modules.{module_name}")
|
||||
except ImportError:
|
||||
module = importlib.import_module(module_name)
|
||||
|
||||
# Suche nach der Agent-Klasse oder einer get_*_agent-Funktion
|
||||
agent_type = module_name.split('_')[-1]
|
||||
class_name = f"{agent_type.capitalize()}Agent"
|
||||
getter_name = f"get_{agent_type}_agent"
|
||||
|
||||
agent = None
|
||||
|
||||
# Versuche, den Agenten über die get_*_agent-Funktion zu holen
|
||||
if hasattr(module, getter_name):
|
||||
getter_func = getattr(module, getter_name)
|
||||
agent = getter_func()
|
||||
logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) via {getter_name}() geladen")
|
||||
|
||||
# Alternativ versuche, den Agenten direkt zu instanziieren
|
||||
elif hasattr(module, class_name):
|
||||
agent_class = getattr(module, class_name)
|
||||
agent = agent_class()
|
||||
logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) direkt instanziiert")
|
||||
|
||||
if agent:
|
||||
# Registriere den Agenten
|
||||
self.register_agent(agent)
|
||||
else:
|
||||
logger.warning(f"Keine Agent-Klasse oder getter-Funktion in Modul {module_name} gefunden")
|
||||
|
||||
except ImportError as e:
|
||||
logger.warning(f"Modul {module_name} konnte nicht importiert werden: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
|
||||
|
||||
def register_agent(self, agent: BaseAgent):
|
||||
"""Registriert einen Agenten in der Registry."""
|
||||
agent_type = agent.type
|
||||
self.agents[agent_type] = agent
|
||||
# Zusätzlich nach ID registrieren
|
||||
self.agents[agent.id] = agent
|
||||
logger.debug(f"Agent '{agent.name}' (Typ: {agent_type}) wurde registriert")
|
||||
|
||||
def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:
|
||||
"""
|
||||
Gibt eine Instanz eines Agenten nach ID oder Typ zurück.
|
||||
|
||||
Args:
|
||||
agent_identifier: ID oder Typ des gewünschten Agenten
|
||||
|
||||
Returns:
|
||||
Agent-Instanz oder None, wenn nicht gefunden
|
||||
"""
|
||||
# Versuche, direkt nach Typ zu finden
|
||||
if agent_identifier in self.agents:
|
||||
return self.agents[agent_identifier]
|
||||
|
||||
# Wenn nicht gefunden, versuche verschiedene Varianten des Namens
|
||||
variants = [
|
||||
agent_identifier,
|
||||
agent_identifier.replace('_agent', ''),
|
||||
f"{agent_identifier}_agent"
|
||||
]
|
||||
|
||||
for variant in variants:
|
||||
if variant in self.agents:
|
||||
return self.agents[variant]
|
||||
|
||||
logger.warning(f"Agent mit Identifier '{agent_identifier}' nicht gefunden")
|
||||
return None
|
||||
|
||||
def get_all_agents(self) -> Dict[str, BaseAgent]:
|
||||
"""Gibt alle registrierten Agenten zurück."""
|
||||
return self.agents
|
||||
|
||||
def get_agent_infos(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt Informationen zu allen registrierten Agenten zurück."""
|
||||
agent_infos = []
|
||||
# Nur einmal pro Agent-Instanz (da wir sowohl nach Typ als auch nach ID registrieren)
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
agent_infos.append(agent.get_agent_info())
|
||||
seen_agents.add(agent)
|
||||
return agent_infos
|
||||
|
||||
def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Initialisiert Agenten für einen Workflow."""
|
||||
initialized_agents = {}
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
agent_info = agent.get_agent_info()
|
||||
agent_id = agent_info["id"]
|
||||
initialized_agents[agent_id] = agent_info
|
||||
seen_agents.add(agent)
|
||||
return initialized_agents
|
||||
1333
gwserver/_old_bk_modules/agentservice_workflow_manager.py
Normal file
1333
gwserver/_old_bk_modules/agentservice_workflow_manager.py
Normal file
File diff suppressed because it is too large
Load diff
469
gwserver/_old_bk_modules/gateway_interface.py
Normal file
469
gwserver/_old_bk_modules/gateway_interface.py
Normal file
|
|
@ -0,0 +1,469 @@
|
|||
import os
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
import importlib
|
||||
from passlib.context import CryptContext
|
||||
|
||||
from connectors.connector_db_json import DatabaseConnector
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# Password-Hashing
|
||||
pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
|
||||
|
||||
|
||||
class GatewayInterface:
|
||||
"""
|
||||
Interface zum Gateway-System.
|
||||
Verwaltet Benutzer und Mandanten.
|
||||
"""
|
||||
|
||||
def __init__(self, mandate_id: int = None, user_id: int = None):
|
||||
"""
|
||||
Initialisiert das Gateway-Interface mit optionalem Mandanten- und Benutzerkontext.
|
||||
|
||||
Args:
|
||||
mandate_id: ID des aktuellen Mandanten (optional)
|
||||
user_id: ID des aktuellen Benutzers (optional)
|
||||
"""
|
||||
# Bei der Initialisierung kann der Kontext leer sein
|
||||
self.mandate_id = mandate_id
|
||||
self.user_id = user_id
|
||||
|
||||
# Datenverzeichnis
|
||||
self.data_folder = "_database_gateway"
|
||||
os.makedirs(self.data_folder, exist_ok=True)
|
||||
logger.info("db for data_gateway attached")
|
||||
|
||||
# Datenmodell-Modul importieren
|
||||
try:
|
||||
self.model_module = importlib.import_module("modules.gateway_model")
|
||||
logger.info("gateway_model erfolgreich importiert")
|
||||
except ImportError as e:
|
||||
logger.error(f"Fehler beim Importieren von gateway_model: {e}")
|
||||
raise
|
||||
|
||||
# Konnektor erstellen
|
||||
logger.info(f"API getting connector {mandate_id} {user_id}")
|
||||
self.db = DatabaseConnector(
|
||||
db_folder=self.data_folder,
|
||||
mandate_id=self.mandate_id if self.mandate_id is not None else 0,
|
||||
user_id=self.user_id if self.user_id is not None else 0
|
||||
)
|
||||
|
||||
# Datenbank initialisieren, falls nötig
|
||||
self._initialize_database()
|
||||
|
||||
def _initialize_database(self):
|
||||
"""
|
||||
Initialisiert die Datenbank mit minimalen Objekten,
|
||||
falls sie noch nicht existiert.
|
||||
"""
|
||||
|
||||
# Prüfe, ob Mandanten existieren
|
||||
# Erstelle den Root-Mandanten, falls nötig
|
||||
existing_mandate_id = self.get_initial_id("mandates")
|
||||
mandates = self.db.get_recordset("mandates")
|
||||
if existing_mandate_id is None or not mandates:
|
||||
logger.info("Erstelle Root-Mandant")
|
||||
root_mandate = {
|
||||
"name": "Root",
|
||||
"language": "de"
|
||||
}
|
||||
created_mandate = self.db.record_create("mandates", root_mandate)
|
||||
logger.info(f"Root-Mandant wurde erstellt mit ID {created_mandate['id']}")
|
||||
|
||||
# Aktualisiere den Mandanten-Kontext
|
||||
self.mandate_id = created_mandate['id']
|
||||
self.user_id = created_mandate['user_id']
|
||||
|
||||
# Konnektor mit korrektem Kontext neu erstellen
|
||||
self.db = DatabaseConnector(
|
||||
db_folder=self.data_folder,
|
||||
mandate_id=self.mandate_id,
|
||||
user_id=self.user_id
|
||||
)
|
||||
|
||||
# Prüfe, ob Benutzer existieren
|
||||
# Erstelle den Admin-Benutzer, falls nötig
|
||||
existing_user_id = self.get_initial_id("users")
|
||||
users = self.db.get_recordset("users")
|
||||
if existing_user_id is None or not users:
|
||||
logger.info("Erstelle Admin-Benutzer")
|
||||
admin_user = {
|
||||
"mandate_id": self.mandate_id,
|
||||
"username": "admin",
|
||||
"email": "admin@example.com",
|
||||
"full_name": "Administrator",
|
||||
"disabled": False,
|
||||
"language": "de",
|
||||
"privilege": "sysadmin", # SysAdmin-Berechtigung
|
||||
"hashed_password": self._get_password_hash("admin") # In der Produktion ein sicheres Passwort verwenden!
|
||||
}
|
||||
created_user = self.db.record_create("users", admin_user)
|
||||
logger.info(f"Admin-Benutzer wurde erstellt mit ID {created_user['id']}")
|
||||
|
||||
# Aktualisiere den Benutzer-Kontext
|
||||
self.user_id = created_user['id']
|
||||
|
||||
# Konnektor mit korrektem Kontext neu erstellen
|
||||
self.db = DatabaseConnector(
|
||||
db_folder=self.data_folder,
|
||||
mandate_id=self.mandate_id,
|
||||
user_id=self.user_id
|
||||
)
|
||||
|
||||
def get_initial_id(self, table: str) -> Optional[int]:
|
||||
"""
|
||||
Gibt die initiale ID für eine Tabelle zurück.
|
||||
|
||||
Args:
|
||||
table: Name der Tabelle
|
||||
|
||||
Returns:
|
||||
Die initiale ID oder None, wenn nicht vorhanden
|
||||
"""
|
||||
return self.db.get_initial_id(table)
|
||||
|
||||
def _get_password_hash(self, password: str) -> str:
|
||||
"""Erstellt einen Hash für ein Passwort"""
|
||||
return pwd_context.hash(password)
|
||||
|
||||
def _verify_password(self, plain_password: str, hashed_password: str) -> bool:
|
||||
"""Überprüft, ob das Passwort zum Hash passt"""
|
||||
return pwd_context.verify(plain_password, hashed_password)
|
||||
|
||||
def _get_current_timestamp(self) -> str:
|
||||
"""Gibt den aktuellen Zeitstempel im ISO-Format zurück"""
|
||||
from datetime import datetime
|
||||
return datetime.now().isoformat()
|
||||
|
||||
# Mandanten-Methoden
|
||||
|
||||
def get_all_mandates(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt alle Mandanten zurück"""
|
||||
return self.db.get_recordset("mandates")
|
||||
|
||||
def get_mandate(self, mandate_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Gibt einen Mandanten anhand seiner ID zurück"""
|
||||
mandates = self.db.get_recordset("mandates", record_filter={"id": mandate_id})
|
||||
if mandates:
|
||||
return mandates[0]
|
||||
return None
|
||||
|
||||
def create_mandate(self, name: str, language: str = "de") -> Dict[str, Any]:
|
||||
"""Erstellt einen neuen Mandanten"""
|
||||
mandate_data = {
|
||||
"name": name,
|
||||
"language": language
|
||||
}
|
||||
|
||||
return self.db.record_create("mandates", mandate_data)
|
||||
|
||||
def update_mandate(self, mandate_id: int, mandate_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Aktualisiert einen bestehenden Mandanten
|
||||
|
||||
Args:
|
||||
mandate_id: Die ID des zu aktualisierenden Mandanten
|
||||
mandate_data: Die zu aktualisierenden Mandantendaten
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Die aktualisierten Mandantendaten
|
||||
|
||||
Raises:
|
||||
ValueError: Wenn der Mandant nicht gefunden wurde
|
||||
"""
|
||||
# Prüfe, ob der Mandant existiert
|
||||
mandate = self.get_mandate(mandate_id)
|
||||
if not mandate:
|
||||
raise ValueError(f"Mandant mit ID {mandate_id} nicht gefunden")
|
||||
|
||||
# Aktualisiere den Mandanten
|
||||
updated_mandate = self.db.record_modify("mandates", mandate_id, mandate_data)
|
||||
|
||||
return updated_mandate
|
||||
|
||||
def delete_mandate(self, mandate_id: int) -> bool:
|
||||
"""
|
||||
Löscht einen Mandanten und alle damit verbundenen Benutzer und Daten
|
||||
|
||||
Args:
|
||||
mandate_id: Die ID des zu löschenden Mandanten
|
||||
|
||||
Returns:
|
||||
bool: True, wenn der Mandant erfolgreich gelöscht wurde, sonst False
|
||||
"""
|
||||
# Prüfe, ob der Mandant existiert
|
||||
mandate = self.get_mandate(mandate_id)
|
||||
if not mandate:
|
||||
return False
|
||||
|
||||
# Prüfe, ob es der initiale Mandant ist
|
||||
initial_mandate_id = self.get_initial_id("mandates")
|
||||
if initial_mandate_id is not None and mandate_id == initial_mandate_id:
|
||||
logger.warning(f"Versuch, den Root-Mandanten zu löschen, wurde verhindert")
|
||||
return False
|
||||
|
||||
# Finde alle Benutzer des Mandanten
|
||||
users = self.get_users_by_mandate(mandate_id)
|
||||
|
||||
# Lösche alle Benutzer des Mandanten und ihre zugehörigen Daten
|
||||
for user in users:
|
||||
self.delete_user(user["id"])
|
||||
|
||||
# Lösche den Mandanten
|
||||
success = self.db.record_delete("mandates", mandate_id)
|
||||
|
||||
if success:
|
||||
logger.info(f"Mandant mit ID {mandate_id} wurde erfolgreich gelöscht")
|
||||
else:
|
||||
logger.error(f"Fehler beim Löschen des Mandanten mit ID {mandate_id}")
|
||||
|
||||
return success
|
||||
|
||||
# Benutzer-Methoden
|
||||
|
||||
def get_all_users(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt alle Benutzer zurück"""
|
||||
users = self.db.get_recordset("users")
|
||||
# Entferne die Passwort-Hashes aus der Rückgabe
|
||||
for user in users:
|
||||
if "hashed_password" in user:
|
||||
del user["hashed_password"]
|
||||
return users
|
||||
|
||||
def get_users_by_mandate(self, mandate_id: int) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Gibt alle Benutzer eines bestimmten Mandanten zurück
|
||||
|
||||
Args:
|
||||
mandate_id: Die ID des Mandanten
|
||||
|
||||
Returns:
|
||||
List[Dict[str, Any]]: Liste der Benutzer des Mandanten
|
||||
"""
|
||||
users = self.db.get_recordset("users", record_filter={"mandate_id": mandate_id})
|
||||
# Entferne die Passwort-Hashes aus der Rückgabe
|
||||
for user in users:
|
||||
if "hashed_password" in user:
|
||||
del user["hashed_password"]
|
||||
return users
|
||||
|
||||
def get_user_by_username(self, username: str) -> Optional[Dict[str, Any]]:
|
||||
"""Gibt einen Benutzer anhand seines Benutzernamens zurück"""
|
||||
users = self.db.get_recordset("users")
|
||||
for user in users:
|
||||
if user.get("username") == username:
|
||||
return user
|
||||
return None
|
||||
|
||||
def get_user(self, user_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""Gibt einen Benutzer anhand seiner ID zurück"""
|
||||
users = self.db.get_recordset("users", record_filter={"id": user_id})
|
||||
if users:
|
||||
user = users[0]
|
||||
# Entferne das Passwort-Hash aus der Rückgabe für die API
|
||||
if "hashed_password" in user:
|
||||
user_copy = user.copy()
|
||||
del user_copy["hashed_password"]
|
||||
return user_copy
|
||||
return user
|
||||
return None
|
||||
|
||||
def create_user(self, username: str, password: str, email: str = None,
|
||||
full_name: str = None, language: str = "de", mandate_id: int = None,
|
||||
disabled: bool = False, privilege: str = "user") -> Dict[str, Any]:
|
||||
"""
|
||||
Erstellt einen neuen Benutzer
|
||||
|
||||
Args:
|
||||
username: Der Benutzername
|
||||
password: Das Passwort
|
||||
email: Die E-Mail-Adresse (optional)
|
||||
full_name: Der vollständige Name (optional)
|
||||
language: Die bevorzugte Sprache (Standard: "de")
|
||||
mandate_id: Die ID des Mandanten (optional)
|
||||
disabled: Ob der Benutzer deaktiviert ist (Standard: False)
|
||||
privilege: Die Berechtigungsstufe (Standard: "user")
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Die erstellten Benutzerdaten
|
||||
|
||||
Raises:
|
||||
ValueError: Wenn der Benutzername bereits existiert
|
||||
"""
|
||||
# Prüfe, ob der Benutzername bereits existiert
|
||||
existing_user = self.get_user_by_username(username)
|
||||
if existing_user:
|
||||
raise ValueError(f"Benutzer '{username}' existiert bereits")
|
||||
|
||||
# Verwende den übergebenen mandate_id oder den aktuellen Kontext
|
||||
user_mandate_id = mandate_id if mandate_id is not None else self.mandate_id
|
||||
|
||||
user_data = {
|
||||
"mandate_id": user_mandate_id,
|
||||
"username": username,
|
||||
"email": email,
|
||||
"full_name": full_name,
|
||||
"disabled": disabled,
|
||||
"language": language,
|
||||
"privilege": privilege,
|
||||
"hashed_password": self._get_password_hash(password)
|
||||
}
|
||||
|
||||
created_user = self.db.record_create("users", user_data)
|
||||
|
||||
# Entferne das Passwort-Hash aus der Rückgabe
|
||||
if "hashed_password" in created_user:
|
||||
del created_user["hashed_password"]
|
||||
|
||||
return created_user
|
||||
|
||||
def authenticate_user(self, username: str, password: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Authentifiziert einen Benutzer anhand von Benutzername und Passwort
|
||||
|
||||
Args:
|
||||
username: Der Benutzername
|
||||
password: Das Passwort
|
||||
|
||||
Returns:
|
||||
Optional[Dict[str, Any]]: Die Benutzerdaten oder None, wenn die Authentifizierung fehlschlägt
|
||||
"""
|
||||
user = self.get_user_by_username(username)
|
||||
|
||||
if not user:
|
||||
return None
|
||||
|
||||
if not self._verify_password(password, user.get("hashed_password", "")):
|
||||
return None
|
||||
|
||||
# Prüfe, ob der Benutzer deaktiviert ist
|
||||
if user.get("disabled", False):
|
||||
return None
|
||||
|
||||
# Erstelle eine Kopie ohne Passwort-Hash
|
||||
authenticated_user = {**user}
|
||||
if "hashed_password" in authenticated_user:
|
||||
del authenticated_user["hashed_password"]
|
||||
|
||||
return authenticated_user
|
||||
|
||||
def update_user(self, user_id: int, user_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Aktualisiert einen Benutzer
|
||||
|
||||
Args:
|
||||
user_id: Die ID des zu aktualisierenden Benutzers
|
||||
user_data: Die zu aktualisierenden Benutzerdaten
|
||||
|
||||
Returns:
|
||||
Dict[str, Any]: Die aktualisierten Benutzerdaten
|
||||
|
||||
Raises:
|
||||
ValueError: Wenn der Benutzer nicht gefunden wurde
|
||||
"""
|
||||
# Hole den aktuellen Benutzer mit Hash-Passwort (direkt aus der DB)
|
||||
users = self.db.get_recordset("users", record_filter={"id": user_id})
|
||||
if not users:
|
||||
raise ValueError(f"Benutzer mit ID {user_id} nicht gefunden")
|
||||
|
||||
user = users[0]
|
||||
|
||||
# Wenn das Passwort geändert werden soll, hashe es
|
||||
if "password" in user_data:
|
||||
user_data["hashed_password"] = self._get_password_hash(user_data["password"])
|
||||
del user_data["password"]
|
||||
|
||||
# Aktualisiere den Benutzer
|
||||
updated_user = self.db.record_modify("users", user_id, user_data)
|
||||
|
||||
# Entferne das Passwort-Hash aus der Rückgabe
|
||||
if "hashed_password" in updated_user:
|
||||
del updated_user["hashed_password"]
|
||||
|
||||
return updated_user
|
||||
|
||||
def disable_user(self, user_id: int) -> Dict[str, Any]:
|
||||
"""Deaktiviert einen Benutzer"""
|
||||
return self.update_user(user_id, {"disabled": True})
|
||||
|
||||
def enable_user(self, user_id: int) -> Dict[str, Any]:
|
||||
"""Aktiviert einen Benutzer"""
|
||||
return self.update_user(user_id, {"disabled": False})
|
||||
|
||||
def _delete_user_referenced_data(self, user_id: int) -> None:
|
||||
"""
|
||||
Löscht alle Daten, die mit einem Benutzer verbunden sind
|
||||
|
||||
Args:
|
||||
user_id: Die ID des Benutzers
|
||||
"""
|
||||
# Hier werden alle Tabellen durchsucht und alle Einträge gelöscht,
|
||||
# die auf diesen Benutzer verweisen
|
||||
|
||||
# Attribute des Benutzers löschen
|
||||
try:
|
||||
attributes = self.db.get_recordset("attributes", record_filter={"user_id": user_id})
|
||||
for attribute in attributes:
|
||||
self.db.record_delete("attributes", attribute["id"])
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Löschen der Attribute für Benutzer {user_id}: {e}")
|
||||
|
||||
# Weitere Tabellen, die auf den Benutzer verweisen könnten
|
||||
# (Je nach Datenbankstruktur der Anwendung)
|
||||
|
||||
logger.info(f"Alle referenzierten Daten für Benutzer {user_id} wurden gelöscht")
|
||||
|
||||
def delete_user(self, user_id: int) -> bool:
|
||||
"""
|
||||
Löscht einen Benutzer und alle damit verbundenen Daten
|
||||
|
||||
Args:
|
||||
user_id: Die ID des zu löschenden Benutzers
|
||||
|
||||
Returns:
|
||||
bool: True, wenn der Benutzer erfolgreich gelöscht wurde, sonst False
|
||||
"""
|
||||
# Prüfe, ob der Benutzer existiert
|
||||
users = self.db.get_recordset("users", record_filter={"id": user_id})
|
||||
if not users:
|
||||
return False
|
||||
|
||||
# Prüfe, ob es der initiale Benutzer ist
|
||||
initial_user_id = self.get_initial_id("users")
|
||||
if initial_user_id is not None and user_id == initial_user_id:
|
||||
logger.warning("Versuch, den Root-Admin zu löschen, wurde verhindert")
|
||||
return False
|
||||
|
||||
# Lösche alle mit dem Benutzer verbundenen Daten
|
||||
self._delete_user_referenced_data(user_id)
|
||||
|
||||
# Lösche den Benutzer
|
||||
success = self.db.record_delete("users", user_id)
|
||||
|
||||
if success:
|
||||
logger.info(f"Benutzer mit ID {user_id} wurde erfolgreich gelöscht")
|
||||
else:
|
||||
logger.error(f"Fehler beim Löschen des Benutzers mit ID {user_id}")
|
||||
|
||||
return success
|
||||
|
||||
|
||||
# Singleton-Factory für GatewayInterface-Instanzen pro Kontext
|
||||
_gateway_interfaces = {}
|
||||
|
||||
def get_gateway_interface(mandate_id: int = None, user_id: int = None) -> GatewayInterface:
|
||||
"""
|
||||
Gibt eine GatewayInterface-Instanz für den angegebenen Kontext zurück.
|
||||
Wiederverwendet bestehende Instanzen.
|
||||
"""
|
||||
context_key = f"{mandate_id}_{user_id}"
|
||||
if context_key not in _gateway_interfaces:
|
||||
_gateway_interfaces[context_key] = GatewayInterface(mandate_id, user_id)
|
||||
return _gateway_interfaces[context_key]
|
||||
|
||||
# Init
|
||||
get_gateway_interface()
|
||||
94
gwserver/_old_bk_modules/gateway_model.py
Normal file
94
gwserver/_old_bk_modules/gateway_model.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Label(BaseModel):
|
||||
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
|
||||
default: str
|
||||
translations: Dict[str, str] = {}
|
||||
|
||||
def get_label(self, language: str = None):
|
||||
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
|
||||
if language and language in self.translations:
|
||||
return self.translations[language]
|
||||
return self.default
|
||||
|
||||
|
||||
class Mandate(BaseModel):
|
||||
"""Datenmodell für einen Mandanten"""
|
||||
id: int = Field(description="Eindeutige ID des Mandanten")
|
||||
name: str = Field(description="Name des Mandanten")
|
||||
language: str = Field(description="Standardsprache des Mandanten")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Mandant", translations={"en": "Mandate", "fr": "Mandat"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"name": Label(default="Name des Mandanten", translations={"en": "Mandate name", "fr": "Nom du mandat"}),
|
||||
"language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"})
|
||||
}
|
||||
|
||||
class User(BaseModel):
|
||||
"""Datenmodell für einen Benutzer"""
|
||||
id: int = Field(description="Eindeutige ID des Benutzers")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
username: str = Field(description="Benutzername für die Anmeldung")
|
||||
email: Optional[str] = Field(None, description="E-Mail-Adresse des Benutzers")
|
||||
full_name: Optional[str] = Field(None, description="Vollständiger Name des Benutzers")
|
||||
language: str = Field(description="Bevorzugte Sprache des Benutzers")
|
||||
disabled: Optional[bool] = Field(False, description="Gibt an, ob der Benutzer deaktiviert ist")
|
||||
privilege: str = Field(description="Berechtigungsstufe") #sysadmin,admin,user
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Benutzer", translations={"en": "User", "fr": "Utilisateur"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
|
||||
"username": Label(default="Benutzername", translations={"en": "Username", "fr": "Nom d'utilisateur"}),
|
||||
"email": Label(default="E-Mail", translations={"en": "Email", "fr": "E-mail"}),
|
||||
"full_name": Label(default="Vollständiger Name", translations={"en": "Full name", "fr": "Nom complet"}),
|
||||
"language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"}),
|
||||
"disabled": Label(default="Deaktiviert", translations={"en": "Disabled", "fr": "Désactivé"}),
|
||||
"privilege": Label(default="Berechtigungsstufe", translations={"en": "Access level", "fr": "Niveau d'accès"}),
|
||||
}
|
||||
|
||||
|
||||
class UserInDB(User):
|
||||
"""Erweiterte Benutzerklasse mit Passwort-Hash"""
|
||||
hashed_password: str = Field(description="Hash des Benutzerpassworts")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Benutzer Zugriff", translations={"en": "User Access", "fr": "Accès de l'utilisateur"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Zusätzliches Label für das Passwort-Feld
|
||||
field_labels: Dict[str, Label] = {
|
||||
"hashed_password": Label(default="Passwort-Hash", translations={"en": "Password hash", "fr": "Hachage de mot de passe"})
|
||||
}
|
||||
|
||||
|
||||
class Token(BaseModel):
|
||||
"""Datenmodell für ein Authentifizierungstoken"""
|
||||
access_token: str = Field(description="Das ausgestellte Zugriffstoken")
|
||||
token_type: str = Field(description="Typ des Tokens (meist 'bearer')")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Token", translations={"en": "Token", "fr": "Jeton"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"access_token": Label(default="Zugriffstoken", translations={"en": "Access token", "fr": "Jeton d'accès"}),
|
||||
"token_type": Label(default="Token-Typ", translations={"en": "Token type", "fr": "Type de jeton"})
|
||||
}
|
||||
1265
gwserver/_old_bk_modules/lucydom_interface.py
Normal file
1265
gwserver/_old_bk_modules/lucydom_interface.py
Normal file
File diff suppressed because it is too large
Load diff
149
gwserver/_old_bk_modules/lucydom_model.py
Normal file
149
gwserver/_old_bk_modules/lucydom_model.py
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
from pydantic import BaseModel, Field
|
||||
from typing import List, Dict, Any, Optional
|
||||
from datetime import datetime
|
||||
|
||||
|
||||
class Label(BaseModel):
|
||||
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
|
||||
default: str
|
||||
translations: Dict[str, str] = {}
|
||||
|
||||
def get_label(self, language: str = None):
|
||||
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
|
||||
if language and language in self.translations:
|
||||
return self.translations[language]
|
||||
return self.default
|
||||
|
||||
|
||||
class FileItem(BaseModel):
|
||||
"""Datenmodell für ein Datenobjekt"""
|
||||
id: int = Field(description="Eindeutige ID des Datenobjekts")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
user_id: int = Field(description="ID des Erstellers")
|
||||
name: str = Field(description="Name des Datenobjekts")
|
||||
type: str = Field(description="Typ des Datenobjekts ('document', 'image', etc.)")
|
||||
size: Optional[str] = Field(None, description="Größe des Datenobjekts")
|
||||
upload_date: Optional[str] = Field(None, description="Datum des Hochladens")
|
||||
content_type: Optional[str] = Field(None, description="Content-Type des Datenobjekts")
|
||||
path: Optional[str] = Field(None, description="Pfad zum Datenobjekt")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
|
||||
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
|
||||
"name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}),
|
||||
"type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}),
|
||||
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}),
|
||||
"upload_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
|
||||
"content_type": Label(default="Content-Type", translations={"en": "Content type", "fr": "Type de contenu"}),
|
||||
"path": Label(default="Pfad", translations={"en": "Path", "fr": "Chemin"})
|
||||
}
|
||||
|
||||
|
||||
class Prompt(BaseModel):
|
||||
"""Datenmodell für einen Prompt"""
|
||||
id: int = Field(description="Eindeutige ID des Prompts")
|
||||
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
|
||||
user_id: int = Field(description="ID des Erstellers")
|
||||
content: str = Field(description="Inhalt des Prompts")
|
||||
name: str = Field(description="Anzeigename des Prompts")
|
||||
|
||||
label: Label = Field(
|
||||
default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}),
|
||||
description="Label für die Klasse"
|
||||
)
|
||||
|
||||
# Labels für Attribute
|
||||
field_labels: Dict[str, Label] = {
|
||||
"id": Label(default="ID", translations={}),
|
||||
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
|
||||
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
|
||||
"content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}),
|
||||
"name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}),
|
||||
}
|
||||
|
||||
|
||||
# Neue Workflow-Modellklassen
|
||||
|
||||
class DocumentSource(BaseModel):
|
||||
"""Quelle eines Dokuments im Workflow"""
|
||||
type: str = Field(description="Typ der Quelle ('prompt', 'file', 'clipboard')")
|
||||
path: Optional[str] = Field(None, description="Speicherpfad (für Dateien)")
|
||||
name: str = Field(description="Anzeigename der Datei")
|
||||
size: Optional[int] = Field(None, description="Größe in Bytes")
|
||||
lines: Optional[int] = Field(None, description="Zeilenanzahl (für Textdateien)")
|
||||
content_type: Optional[str] = Field(None, description="MIME-Typ")
|
||||
upload_date: Optional[str] = Field(None, description="Uploaddatum")
|
||||
|
||||
class DocumentContent(BaseModel):
|
||||
"""Inhalt eines Dokuments im Workflow"""
|
||||
label: Optional[str] = Field(None, description="Optionale Bezeichnung")
|
||||
type: str = Field(description="Typ des Inhalts ('text', 'image', 'chart', etc.)")
|
||||
text: Optional[str] = Field(None, description="Textinhalt")
|
||||
is_extracted: Optional[bool] = Field(False, description="Flag, ob aus Originaldatei extrahiert")
|
||||
|
||||
class Document(BaseModel):
|
||||
"""Dokument im Workflow (inkl. Prompt und referenzierte Dateien)"""
|
||||
id: str = Field(description="Eindeutige ID des Dokuments")
|
||||
source: DocumentSource = Field(description="Quellmetadaten")
|
||||
contents: List[DocumentContent] = Field(description="Dokumentinhalte")
|
||||
|
||||
class DataStats(BaseModel):
|
||||
"""Statistiken für Performance und Datennutzung"""
|
||||
processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden")
|
||||
token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)")
|
||||
bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes")
|
||||
bytes_received: Optional[int] = Field(None, description="Empfangene Bytes")
|
||||
|
||||
class Message(BaseModel):
|
||||
"""Nachrichtenobjekt im Workflow"""
|
||||
id: str = Field(description="Eindeutige ID der Nachricht")
|
||||
workflow_id: str = Field(description="Referenz zum übergeordneten Workflow")
|
||||
parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht")
|
||||
started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung")
|
||||
finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss")
|
||||
sequence_no: int = Field(description="Sequenznummer für Sortierung")
|
||||
|
||||
status: str = Field(description="Status der Nachricht ('pending', 'processing', 'completed', 'failed')")
|
||||
role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')")
|
||||
|
||||
data_stats: Optional[DataStats] = Field(None, description="Statistiken")
|
||||
documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht")
|
||||
content: Optional[str] = Field(None, description="Textinhalt der Nachricht")
|
||||
agent_type: Optional[str] = Field(None, description="Typ des verwendeten Agenten")
|
||||
|
||||
class Workflow(BaseModel):
|
||||
"""Workflow-Objekt für Multi-Agent-System"""
|
||||
id: str = Field(description="Eindeutige ID des Workflows")
|
||||
name: Optional[str] = Field(None, description="Name des Workflows")
|
||||
mandate_id: int = Field(description="ID des Mandanten")
|
||||
user_id: int = Field(description="ID des Benutzers")
|
||||
status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')")
|
||||
started_at: str = Field(description="Startzeitpunkt")
|
||||
last_activity: str = Field(description="Zeitpunkt der letzten Aktivität")
|
||||
current_round: int = Field(description="Aktuelle Runde")
|
||||
waiting_for_user: bool = Field(False, description="Flag, ob auf Benutzereingabe gewartet wird")
|
||||
|
||||
data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken")
|
||||
messages: List[Message] = Field(default=[], description="Nachrichtenverlauf")
|
||||
logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge")
|
||||
|
||||
# Anfragemodelle für die API
|
||||
|
||||
class WorkflowCreateRequest(BaseModel):
|
||||
"""Anfrage zur Erstellung eines neuen Workflows"""
|
||||
name: Optional[str] = Field(None, description="Name des Workflows")
|
||||
prompt: str = Field(description="Zu verwendender Prompt")
|
||||
files: List[int] = Field(default=[], description="Liste von Datei-IDs")
|
||||
|
||||
class UserInputRequest(BaseModel):
|
||||
"""Anfrage für Benutzereingabe an einen laufenden Workflow"""
|
||||
message: str = Field(description="Nachricht des Benutzers")
|
||||
additional_files: List[int] = Field(default=[], description="Liste zusätzlicher Datei-IDs")
|
||||
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
{
|
||||
"id": "02c43847-f96d-4638-aaa5-c8f926216405",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 00:21:27",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T00:21:27.780332",
|
||||
"last_activity": "2025-04-07T00:21:28.829553",
|
||||
"prompt": "die liste ist: auszug_liste_positionen.pdf",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_ee59bd4b-1014-40ae-8430-c427b71b637d",
|
||||
"workflow_id": "02c43847-f96d-4638-aaa5-c8f926216405",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T00:21:27.809178",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "die liste ist: auszug_liste_positionen.pdf",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben keine spezifischen Aufgaben durchgeführt. Gibt es etwas, wobei ich Ihnen helfen kann?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_9bbbef1d-5d0b-48fc-94d2-40b710395f8e",
|
||||
"message": "Neuer Benutzereingabe erhalten - Vorherigen Workflow beenden",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:27.784294",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_0263aa06-39c6-474e-874b-0661de064898",
|
||||
"message": "Starte Workflow-Ausführung, Nachrichtenlänge: 42, 0 Dateien",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:27.793111",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_b2e8023d-96de-499c-8b56-2814703a5d67",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:27.796306",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f1a1b228-9496-497c-82ce-3b7e995148ce",
|
||||
"message": "Agenten-Entscheidung abgeschlossen: 0 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:28.815135",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_ae67dac3-686a-4edc-ad32-c69244878e78",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben keine spezifischen Aufgaben durc...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:28.829553",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
{
|
||||
"id": "0636a071-9c84-4288-a059-a095f03b9ac9",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 23:21:08",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T23:21:08.441429",
|
||||
"last_activity": "2025-04-07T23:21:19.748688",
|
||||
"prompt": "bitte extrahiere aus dem pdf dokument die tabelle, nur die gelb markierten zeilen, und liefere das resultat als html tabelle",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_c332599d-ebaa-4663-a6cf-92836bc28ab5",
|
||||
"workflow_id": "0636a071-9c84-4288-a059-a095f03b9ac9",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T23:21:08.457429",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 5,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 5,
|
||||
"name": "auszug_liste_positionen.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size": 299729,
|
||||
"upload_date": "2025-04-07T23:21:08.596674"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "\n\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "bitte extrahiere aus dem pdf dokument die tabelle, nur die gelb markierten zeilen, und liefere das resultat als html tabelle",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"id": "msg_e651db4b-e7aa-4602-bd90-bc3035046aa8",
|
||||
"workflow_id": "0636a071-9c84-4288-a059-a095f03b9ac9",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T23:21:10.828027",
|
||||
"finished_at": "2025-04-07T23:21:10.839250",
|
||||
"sequence_no": 2,
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "Ich habe als Entwickler die Anforderungen analysiert und folgenden Code entwickelt:\n\n```python\ndef process_data(input_file, output_file):\n \"\"\"\n Verarbeitet Daten aus einer Eingabedatei und speichert die Ergebnisse.\n \n Args:\n input_file: Pfad zur Eingabedatei\n output_file: Pfad zur Ausgabedatei\n \"\"\"\n import pandas as pd\n \n # Daten einlesen\n df = pd.read_csv(input_file)\n \n # Daten verarbeiten\n # Hier würde die eigentliche Datenverarbeitung stattfinden\n processed_df = df.copy()\n \n # Beispiel: Spalten filtern und transformieren\n if 'data' in processed_df.columns:\n processed_df['transformed'] = processed_df['data'] * 2\n \n # Ergebnis speichern\n processed_df.to_csv(output_file, index=False)\n \n return len(processed_df)\n\n# Beispielaufruf\nif __name__ == \"__main__\":\n result = process_data(\"input.csv\", \"output.csv\")\n print(f\"Verarbeitet: result Datensätze\")\n```\n\nDer Code liest eine CSV-Datei ein, führt eine einfache Transformation durch und speichert das Ergebnis. \nDie Hauptfunktion `process_data` ist modular gestaltet und kann leicht erweitert werden.\n\nZur Verwendung:\n1. Stelle sicher, dass pandas installiert ist (`pip install pandas`)\n2. Passe die Dateipfade im Beispielaufruf an\n3. Erweitere die Verarbeitung je nach deinen spezifischen Anforderungen",
|
||||
"agent_type": "coder",
|
||||
"agent_id": "coder_agent",
|
||||
"agent_name": "Entwickler"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben ihre Aufgaben abgeschlossen. Hier ist eine Zusammenfassung der Ergebnisse:\n\n**Zusammenfassung der Agentenergebnisse:**\n\n**Entwickler:**\n\n- **Ziel:** Entwicklung eines Codes zur Verarbeitung von Daten aus einer Eingabedatei und Speicherung der Ergebnisse in einer Ausgabedatei.\n \n- **Hauptfunktion:** `process_data(input_file, output_file)`\n - **Beschreibung:** Diese Funktion liest eine CSV-Datei ein, verarbeitet die Daten und speichert das Ergebnis in einer neuen CSV-Datei.\n - **Parameter:**\n - `input_file`: Pfad zur Eingabedatei.\n - `output_file`: Pfad zur Ausgabedatei.\n - **Verarbeitungsschritte:**\n - Einlesen der Daten mit `pandas`.\n - Kopieren der Daten für die Verarbeitung.\n - Beispielhafte Transformation: Wenn die Spalte 'data' vorhanden ist, wird eine neue Spalte 'transformed' erstellt, die den doppelten Wert der 'data'-Spalte enthält.\n - Speichern des verarbeiteten DataFrames in der Ausgabedatei.\n - **Rückgabewert:** Anzahl der verarbeiteten Datensätze.\n\n- **Beispielaufruf:**\n - Der Code enthält einen Beispielaufruf, der die Funktion mit den Dateipfaden \"input.csv\" und \"output.csv\" ausführt und die Anzahl der verarbeiteten Datensätze ausgibt.\n\n- **Empfehlungen zur Verwendung:**\n 1. Sicherstellen, dass die Bibliothek `pandas` installiert ist (`pip install pandas`).\n 2. Anpassen der Dateipfade im Beispielaufruf entsprechend den eigenen Anforderungen.\n 3. Erweiterung der Datenverarbeitung je nach spezifischen Anforderungen.\n\n**Wichtige Erkenntnisse:**\n- Der Code ist modular und bietet eine einfache Grundlage für die Datenverarbeitung, die leicht erweitert werden kann.\n- Die Verwendung von `pandas` ermöglicht eine effiziente Datenmanipulation und -speicherung.\n\n**Empfehlungen:**\n- Bei Bedarf spezifische Datenverarbeitungslogik hinzufügen, um den Anforderungen des Projekts gerecht zu werden.\n- Sicherstellen, dass die Eingabedatei das erwartete Format hat, insbesondere die Existenz der 'data'-Spalte, um Fehler zu vermeiden.\n\nBenötigen Sie weitere Informationen oder haben Sie Fragen dazu?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_6521d737-3b10-412c-a6ba-e6bbeb2cc8b1",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:08.445434",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_7579ddac-0b3b-4b46-ac55-69deb5f8fb82",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:08.463960",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_1e112fd0-cfab-41e5-a354-4fedc0d24504",
|
||||
"message": "File auszug_liste_positionen.pdf read successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:08.586252",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_e1bf4d62-f6d8-4599-87ca-79c77e5531b4",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:08.638540",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_a1654c7e-c13c-49c5-a428-9a1da8f76af3",
|
||||
"message": "Moderator analysiert die Anfrage und entscheidet über System-Agenten...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:08.649160",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_b30733be-ccb7-44a2-952c-3530f3ad2b31",
|
||||
"message": "Moderator hat 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.685302",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_6b8ca045-b4ce-413d-964e-a379040f8a80",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.779736",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_3152397f-f06b-4924-a036-09d8ed7d7231",
|
||||
"message": "Agent coder_agent wurde ausgewählt mit Aufgabe: Extrahiere die Tabelle aus dem PDF-Dokument 'auszu...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.790734",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_7ab4c761-b90c-421c-bd6c-6234f8b5da17",
|
||||
"message": "1 System-Agenten werden ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.800734",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_cffbf439-bf79-4fe9-a967-11944968f123",
|
||||
"message": "Agent coder_agent wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.810733",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_445efab6-236c-448f-a02b-c0bde7515594",
|
||||
"message": "Agent 'coder_agent' wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.817365",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_09b1b4a8-2e41-43ed-871e-2eaccb02d3a7",
|
||||
"message": "Agent 'coder_agent' hat geantwortet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:10.976046",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f8f37e48-03ac-4f71-a7c2-b2acb31ba309",
|
||||
"message": "Agent coder_agent hat seine Aufgabe abgeschlossen",
|
||||
"type": "success",
|
||||
"timestamp": "2025-04-07T23:21:10.987008",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_6b4829be-cfbb-4c47-88a5-5f595aa61f05",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben ihre Aufgaben abgeschlossen. Hie...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:21:19.748688",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
{
|
||||
"id": "0986cb9b-0bb3-4717-a67f-b8af6edab82a",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:57:33",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T07:57:33.607281",
|
||||
"last_activity": "2025-04-07T07:57:44.323158",
|
||||
"prompt": "kannst du mir den prozessablauf gemäss bild beschreiben?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_6a842a04-1364-4911-b76c-759c204e4c80",
|
||||
"workflow_id": "0986cb9b-0bb3-4717-a67f-b8af6edab82a",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:57:33.624871",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 4,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 4,
|
||||
"name": "LF-Details.png",
|
||||
"content_type": "image/png",
|
||||
"size": 253009,
|
||||
"upload_date": "2025-04-07T07:57:42.989309"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Image Analysis:\nThe image is a detailed flowchart illustrating a product development and release process involving multiple teams: Product Team, Engineering Team, Q&A Team, and Operations. Here's a breakdown of each section:\n\n### Input\n- **Sources**: Customers, Sales, Our Ideas\n- These inputs feed into the Product Team's processes.\n\n### Product Team\n1. **Discover**\n - **Collect**: Ideas & Inputs\n - **Qualify**: Analyze and match against business priorities\n\n2. **Define**\n - **Structure & Document**: Write specifications, define acceptance criteria\n - **Explore & Design**: UI Design, experiments, etc.\n\n3. **Shape**\n - **Roadmap**: Define roadmap to meet opportunities and protect development stability\n - **Scoping**: Define next version's scope\n - **Manage**: Tickets in Product\n\n### Engineering Team\n- **Assess**\n - **Manage**: Move assessed tickets to Engineering for future versions\n - **Estimate**: Define implementation, feasibility, and effort\n\n- **Build**\n - **Manage**: Track progress in dashboard\n - **Factory**: Develop tickets in Engineering for the next version\n\n### Q&A Team\n- **Validate**\n - **Testing**: Test functionality against acceptance\n - **Analysis**: Analyze tickets, events, deviations\n\n### Operations\n1. **Release**\n - **Manage**: Ensure stable changes\n\n2. **Run & Monitor**\n - **Manage**: Ensure stable operation\n - **Service & Support**: Deliver 1st level support according to agreed SLA\n\n### Output\n- **Product**: The final output of the process\n\n### Tool Integration\n- **Ticketing System**: Visual representation of ticket management\n- **Roadmap**: Timeline of project phases\n- **Management Dashboards**: Visual data representation for tracking progress\n\nThe flowchart uses arrows to indicate the flow of processes and responsibilities between different teams and stages. The overall process is designed to ensure a structured approach to product development, validation, and release."
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir den prozessablauf gemäss bild beschreiben?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Bitte beschreibe den Prozessablauf gemäß dem Bild 'LF-Details.png', das einen detaillierten Flussdiagramm des Produktentwicklungs- und Freigabeprozesses mit mehreren Teams zeigt.",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_8cffc836-4259-4dfc-8ec5-c615dbcf2181",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:33.613278",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_a482c6b7-7231-476b-a80f-15a9b3bdc456",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:33.634284",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_dff2a9d0-e19a-4b97-959a-9f371f16c0b2",
|
||||
"message": "Image LF-Details.png analyzed successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:42.961160",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f784ac58-a3d1-492a-a8c6-f4a9fe6c8af7",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:43.029921",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_bf31bb97-6eb0-4db0-aec8-9d647fc7aec5",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:44.303160",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_2b7b963d-fa4a-44de-a654-8b39eb60c95b",
|
||||
"message": "Agent user_agent wurde ausgewählt mit Aufgabe: Bitte beschreibe den Prozessablauf gemäß dem Bild ...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:44.313158",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_459c01bb-a474-43d6-be59-54f7fd9afa8d",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Bitte beschreibe den Prozessablauf gemäß dem Bild ...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:44.323158",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,81 @@
|
|||
{
|
||||
"id": "3d0e9cb6-035c-44b1-943d-a98d53567685",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 00:21:54",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T00:21:54.677139",
|
||||
"last_activity": "2025-04-07T00:21:57.890423",
|
||||
"prompt": "die liste ist: auszug_liste_positionen.pdf",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_c6f67930-e262-4dea-b72e-23c5a59298ab",
|
||||
"workflow_id": "3d0e9cb6-035c-44b1-943d-a98d53567685",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T00:21:54.721119",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "die liste ist: auszug_liste_positionen.pdf",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben keine spezifischen Aufgaben durchgeführt. Gibt es etwas, wobei ich Ihnen helfen kann?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_acacdf93-626a-4309-a76e-189f4d780281",
|
||||
"message": "Neuer Benutzereingabe erhalten - Vorherigen Workflow beenden",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:54.692639",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_aad6ddbc-308a-4e85-be31-a67768eadaa8",
|
||||
"message": "Starte Workflow-Ausführung, Nachrichtenlänge: 42, 0 Dateien",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:54.702189",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_62e4b39e-c6a3-45ff-9d2c-ba0118d08ca4",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:54.712112",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_459e9021-7543-48d6-a5f5-9887e718fd6e",
|
||||
"message": "Agenten-Entscheidung abgeschlossen: 0 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:57.881389",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_785b71f7-d884-4a4b-91bd-8e09dbca4f45",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben keine spezifischen Aufgaben durc...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:21:57.890423",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
{
|
||||
"id": "5e1f3e55-9f1b-44ea-ad23-f8d2ec044c93",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 00:23:14",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T00:23:15.535415",
|
||||
"last_activity": "2025-04-07T00:23:16.916521",
|
||||
"prompt": "kannst du mir eine tabelle der positionen auf beiliegender liste machen?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_72a8c8f0-b4b8-4e71-b9e0-ca0b82b7e101",
|
||||
"workflow_id": "5e1f3e55-9f1b-44ea-ad23-f8d2ec044c93",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T00:23:15.567895",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 5,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 5,
|
||||
"name": "auszug_liste_positionen.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size": 299729,
|
||||
"upload_date": "2025-04-07T00:23:15.596093"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "\n\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir eine tabelle der positionen auf beiliegender liste machen?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Bitte analysiere die Benutzeranfrage und die angehängte PDF-Datei 'auszug_liste_positionen.pdf', um eine Tabelle der Positionen zu erstellen. Nutze deine Fähigkeiten zur Beantwortung von Fragen und zur Bereitstellung zusätzlicher Informationen, um sicherzustellen, dass die Tabelle korrekt und vollständig ist.",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_325b65f9-64af-45c9-877e-bbdb17f9ab7b",
|
||||
"message": "Neuer Benutzereingabe erhalten - Vorherigen Workflow beenden",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:15.539680",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_e7829c3d-8980-4513-9c2f-fc7ed29debf7",
|
||||
"message": "Starte Workflow-Ausführung, Nachrichtenlänge: 73, 1 Dateien",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:15.549067",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_3ad9aa6a-fbaf-4fcd-9da9-e567b94cad3b",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:15.558896",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_c317026f-671b-4b2d-b67b-661d91a476ef",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:15.573899",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_ba71fd87-a165-49c2-9318-8e6eb8b328bc",
|
||||
"message": "File auszug_liste_positionen.pdf read successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:15.586126",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_861adfb4-9c69-47ea-84be-3a584868f97c",
|
||||
"message": "Agenten-Entscheidung abgeschlossen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:16.898589",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_fcf2859c-d733-407a-9ff2-4f1586faeb20",
|
||||
"message": "Agent user_agent wurde ausgewählt mit Aufgabe: Bitte analysiere die Benutzeranfrage und die angeh...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:16.908821",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_3f5c41e3-df06-42ab-936a-c9e89da2536b",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Bitte analysiere die Benutzeranfrage und die angeh...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:23:16.916521",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,123 @@
|
|||
{
|
||||
"id": "5ebd5fec-901d-4c2e-a2af-9d802a60fa14",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 09:29:04",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T09:29:04.812207",
|
||||
"last_activity": "2025-04-07T09:29:16.150391",
|
||||
"prompt": "kannst du mir diesen prozess gemäss bild beschreiben?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_48b369e0-828d-45ae-8e46-62040827e88c",
|
||||
"workflow_id": "5ebd5fec-901d-4c2e-a2af-9d802a60fa14",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T09:29:04.822034",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 3,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 3,
|
||||
"name": "LF-Current.png",
|
||||
"content_type": "image/png",
|
||||
"size": 126277,
|
||||
"upload_date": "2025-04-07T09:29:15.136789"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Image Analysis:\nThe image is a flowchart illustrating a product development process involving both a Product Team and an Engineering Team. Here's a detailed description:\n\n### Input Section\n- **Input Cloud**: Contains sources of input such as \"Customers,\" \"Sales,\" and \"Our ideas.\"\n\n### Product Team\n1. **Discover**\n - **Collect**: Ideas & Inputs.\n - **Qualify**: Analyze and match in the team.\n\n2. **Define**\n - **Structure & Document**: Write specifications, define acceptance criteria (Product > Inputs & Ideas).\n - **Explore & Design**: UI Design, Experiments, etc. (Miro: others).\n\n3. **Shape**\n - **Roadmap**: Define a roadmap.\n - **Scoping**: Define next version's scope.\n - **Manage**: Tickets in Product.\n\n### Engineering Team\n1. **Build**\n - **Manage**: Track progress in dashboard.\n - **Factory**: Develop tickets in Engineering > Next Version.\n\n2. **Release**\n - **Manage**: Ensure stable changes.\n\n3. **Run & Monitor**\n - **Manage**: Ensure stable operation.\n - **Service & Support**: Deliver 1st Level support according to agreed SLA.\n\n4. **Assess**\n - **Manage**: Move assessed tickets to Engineering > Future Versions.\n - **Estimate**: Assess tickets in \"Product > Analysis,\" define implementation, feasibility, and effort.\n\n### Output Section\n- **Output Box**: Contains \"Product.\"\n\n### Tool Integration\n- **Ticketing System**: Shows a list of categories such as \"Products,\" \"Inputs & Ideas,\" \"Analysis,\" etc.\n- **Roadmap**: Mentioned as part of tool integration.\n- **Management Dashboards**: Mentioned as part of tool integration.\n\n### Flow\n- Arrows indicate the flow of processes between different stages, with blue arrows for standard flow and a red arrow for the release stage.\n\nThe chart visually represents the workflow from input collection to product release and monitoring, integrating both product management and engineering tasks."
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir diesen prozess gemäss bild beschreiben?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Ich habe Ihre Anfrage geprüft. Wie kann ich Ihnen konkret weiterhelfen?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_77f86a02-6888-41b2-83ca-78721b35d01d",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:04.816792",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_48bfe47f-3b92-448e-95da-11f6fee5a1a1",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:04.832952",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_960d1fbe-0bb1-4fde-abc7-33df614c6683",
|
||||
"message": "Image LF-Current.png analyzed successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:15.119794",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_e18c28be-2801-43db-9776-21a14b292b78",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:15.197388",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_d82582ef-ced0-4a26-97e0-946b183005e0",
|
||||
"message": "Moderator analysiert die Anfrage und entscheidet über System-Agenten...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:15.209386",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_27ae68fd-c850-40a4-9a0e-f80eb01ae698",
|
||||
"message": "Moderator hat entschieden, keine System-Agenten zu verwenden",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:16.018091",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_811d01d9-282a-48f9-9872-eb19860e4d29",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 0 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:16.149380",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_18ea6b08-50f0-488b-b214-b410daf089dc",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Ich habe Ihre Anfrage geprüft. Wie kann ich Ihnen ...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T09:29:16.150391",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
@ -0,0 +1,115 @@
|
|||
{
|
||||
"id": "9c22a025-85d4-46e6-ab3a-7e13a6be3060",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:45:30",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T07:45:31.343240",
|
||||
"last_activity": "2025-04-07T07:45:44.462133",
|
||||
"prompt": "Kannst Du mir den Prozessablauf gemäss Bild beschreiben?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_ffe3db2e-15c0-4f86-bbe9-e1bff753f15e",
|
||||
"workflow_id": "9c22a025-85d4-46e6-ab3a-7e13a6be3060",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:45:31.358245",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 4,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 4,
|
||||
"name": "LF-Details.png",
|
||||
"content_type": "image/png",
|
||||
"size": 253009,
|
||||
"upload_date": "2025-04-07T07:45:43.036391"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Image Analysis:\nThe image is a flowchart illustrating a product development process involving multiple teams: Product Team, Engineering Team, Q&A Team, and Operations. Here's a detailed breakdown:\n\n### Input\n- **Sources**: Customers, Sales, Our Ideas\n- These inputs feed into the Product Team's processes.\n\n### Product Team\n1. **Discover**\n - **Collect**: Ideas & Inputs\n - **Qualify**: Analyze and match against business priorities\n\n2. **Define**\n - **Structure & Document**: Write specifications, define acceptance criteria\n - **Explore & Design**: UI Design, experiments, etc.\n\n3. **Shape**\n - **Roadmap**: Define roadmap to meet opportunities and protect development stability\n - **Scoping**: Define next version's scope\n - **Manage**: Tickets in Product\n\n### Engineering Team\n- **Assess**\n - **Manage**: Move assessed tickets to Engineering for future versions\n - **Estimate**: Define implementation, feasibility, and effort\n\n- **Build**\n - **Manage**: Track progress in dashboard\n - **Factory**: Develop tickets in Engineering for the next version\n\n### Q&A Team\n- **Validate**\n - **Testing**: Test functionality against acceptance\n - **Analysis**: Analyze tickets, events, deviations\n\n### Operations\n1. **Release**\n - **Manage**: Ensure stable changes\n\n2. **Run & Monitor**\n - **Manage**: Ensure stable operation\n - **Service & Support**: Deliver 1st level support according to agreed SLA\n\n### Output\n- **Product**: The final output of the process\n\n### Tool Integration\n- **Ticketing System**: Visual representation of a ticketing system\n- **Roadmap**: Timeline view of project phases\n- **Management Dashboards**: Display of metrics and progress\n\nThe flowchart uses arrows to indicate the flow of processes between different stages and teams, emphasizing collaboration and integration across the product lifecycle."
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "Kannst Du mir den Prozessablauf gemäss Bild beschreiben?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Beschreibe den Prozessablauf gemäß dem Bild 'LF-Details.png', das einen Produktentwicklungsprozess mit mehreren Teams zeigt, darunter das Produktteam, das Engineering-Team, das Q&A-Team und das Operations-Team. Nutze die bereitgestellte Bildanalyse, um eine detaillierte Beschreibung zu geben.",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_2599cb2e-a871-4a41-8a5a-1bed6d99b686",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:31.348246",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_b98a4f55-2d9a-49a9-8a23-f7697716589e",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:31.364247",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_7da45b91-b6a2-42c4-abb5-d2b78218c74e",
|
||||
"message": "Image LF-Details.png analyzed successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:42.998281",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_8367ecef-3c77-4b4a-9cec-e95c3da0a28a",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:43.114195",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_714dfce7-242e-4805-899f-43cc563ee424",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:44.442550",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_d743afc2-7f55-4ed6-a620-5a4e569c708e",
|
||||
"message": "Agent user_agent wurde ausgewählt mit Aufgabe: Beschreibe den Prozessablauf gemäß dem Bild 'LF-De...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:44.452578",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_cf4cf75b-0650-4691-b870-b1c43ce2c3fa",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Beschreibe den Prozessablauf gemäß dem Bild 'LF-De...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:45:44.462133",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
{
|
||||
"id": "c8039704-16dc-43da-bb0f-25e582181427",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:57:57",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T07:57:57.848119",
|
||||
"last_activity": "2025-04-07T07:57:58.629389",
|
||||
"prompt": "bitte beschreibe das bild",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_1380ad35-257e-4557-9a7e-6de45fc0e58e",
|
||||
"workflow_id": "c8039704-16dc-43da-bb0f-25e582181427",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:57:57.863592",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "bitte beschreibe das bild",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben keine spezifischen Aufgaben durchgeführt. Gibt es etwas, wobei ich Ihnen helfen kann?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_291e65a9-d0cc-46e8-963b-e26dd30bd9a3",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:57.853593",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_5be4834c-8f9a-4986-ba78-8b7ba0957e9f",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:57.889074",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_55ec375b-c9cc-4330-a067-7d2bb9c69833",
|
||||
"message": "Moderator konnte keine Agenten auswählen",
|
||||
"type": "warning",
|
||||
"timestamp": "2025-04-07T07:57:58.560753",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_8c4ea674-195e-4c5f-a34c-e54fd4a5885f",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 0 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:58.619857",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_99a7102b-0548-43ee-8113-f2b0d6434e2b",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben keine spezifischen Aufgaben durc...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:57:58.629389",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"id": "d2224892-8376-4fbf-810f-f9e254897acb",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:46:34",
|
||||
"status": "failed",
|
||||
"started_at": "2025-04-07T07:46:35.477987",
|
||||
"last_activity": "2025-04-07T07:46:36.452417",
|
||||
"prompt": "du hast das bild \nLF-Details.png. bitte beschreibe den prozessablauf",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_59578e2c-7919-4f73-a705-1985cc48562d",
|
||||
"workflow_id": "d2224892-8376-4fbf-810f-f9e254897acb",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:46:35.494128",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "du hast das bild \nLF-Details.png. bitte beschreibe den prozessablauf",
|
||||
"agent_type": null
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_fb17d45e-664d-451b-9d75-c39d00142c7f",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:46:35.477987",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_1f0ca7ed-90ae-4c45-97f4-2466831215f8",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:46:35.519150",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_7d308ff9-53a3-4d84-8b15-9aa49dffa058",
|
||||
"message": "Fehler bei der Workflow-Ausführung: 'id'",
|
||||
"type": "error",
|
||||
"timestamp": "2025-04-07T07:46:36.452417",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,81 @@
|
|||
{
|
||||
"id": "e109ffb3-4cd7-4e57-b69e-fcac4b83c140",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:58:19",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T07:58:19.548177",
|
||||
"last_activity": "2025-04-07T07:58:20.906677",
|
||||
"prompt": "bitte wähle einen assistenten aus",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_968af387-dcbc-4e58-8297-b369d7238ec7",
|
||||
"workflow_id": "e109ffb3-4cd7-4e57-b69e-fcac4b83c140",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:58:19.564185",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "bitte wähle einen assistenten aus",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Bitte geben Sie weitere Informationen zur Anfrage an, damit wir den passenden Agenten auswählen können.",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_bed28ad9-f70d-46d9-8fe7-392490d1e183",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:58:19.552179",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f3fc91a0-5536-4d65-9d2a-4208291f5443",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:58:19.589182",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_e3a5018e-3567-4dc5-b177-ba159f0b7571",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:58:20.880709",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_3b44a3c0-2b4c-43bd-8518-2fd609bc5957",
|
||||
"message": "Agent user_agent wurde ausgewählt mit Aufgabe: Bitte geben Sie weitere Informationen zur Anfrage ...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:58:20.891107",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_603069ea-1dce-4c99-8f18-b3a7903ddcc5",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Bitte geben Sie weitere Informationen zur Anfrage ...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:58:20.906677",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,192 @@
|
|||
{
|
||||
"id": "ea722d55-1cc6-4cd4-87bb-f34c5d28c46d",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 23:47:34",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T23:47:34.613027",
|
||||
"last_activity": "2025-04-07T23:47:41.116577",
|
||||
"prompt": "kannst du mir die tablle des files extrahieren?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_ebceb08d-0fb1-4851-a34b-999a8b4e76b4",
|
||||
"workflow_id": "ea722d55-1cc6-4cd4-87bb-f34c5d28c46d",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T23:47:34.629033",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 5,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 5,
|
||||
"name": "auszug_liste_positionen.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size": 299729,
|
||||
"upload_date": "2025-04-07T23:47:34.663200"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "\n\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir die tablle des files extrahieren?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"id": "msg_eedd3fbd-c58d-4650-a7cf-4bd15255abc6",
|
||||
"workflow_id": "ea722d55-1cc6-4cd4-87bb-f34c5d28c46d",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T23:47:36.937170",
|
||||
"finished_at": "2025-04-07T23:47:36.945249",
|
||||
"sequence_no": 2,
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "Ich habe als SharePoint-Agent die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:\n\n## SharePoint-Suchergebnisse\n\nBasierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert:\n\n### Dokumente\n1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025)\n - Standort: Projekte/Strategische Planung\n - Autor: Maria Schmidt\n - Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine\n\n2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025)\n - Standort: Marketing/Marktforschung\n - Autor: Thomas Müller\n - Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken\n\n3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025)\n - Standort: Finanzen/Planung\n - Autor: Sarah Weber\n - Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen\n\n### SharePoint-Listen\n1. **Projektstatusliste**\n - 12 Einträge mit relevanten Projektstatusinformationen\n - Letzte Aktualisierung: 25.03.2025\n\n## Zusammenfassung der Inhalte\n\nDie gefundenen Dokumente zeigen übereinstimmend, dass:\n- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt\n- Das Budget für Forschung und Entwicklung um 15% erhöht wurde\n- Drei neue Hauptprojekte im zweiten Quartal starten werden\n\n## Empfehlungen\n\nBasierend auf den gefundenen Informationen empfehle ich:\n1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen\n2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen\n3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden\n\nDie Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.",
|
||||
"agent_type": "sharepoint",
|
||||
"agent_id": "sharepoint_agent",
|
||||
"agent_name": "SharePoint-Agent"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben ihre Aufgaben abgeschlossen. Hier ist eine Zusammenfassung der Ergebnisse:\n\n### Zusammenfassung der SharePoint-Agentenergebnisse\n\n#### Identifizierte Dokumente\n\n1. **Projektplan_2025.docx**\n - **Standort:** Projekte/Strategische Planung\n - **Autor:** Maria Schmidt\n - **Inhalt:** Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine\n\n2. **Marktanalyse_Q1_2025.pptx**\n - **Standort:** Marketing/Marktforschung\n - **Autor:** Thomas Müller\n - **Inhalt:** Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken\n\n3. **Budgetplanung_2025.xlsx**\n - **Standort:** Finanzen/Planung\n - **Autor:** Sarah Weber\n - **Inhalt:** Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen\n\n#### SharePoint-Listen\n\n- **Projektstatusliste**\n - **Einträge:** 12 mit relevanten Projektstatusinformationen\n - **Letzte Aktualisierung:** 25.03.2025\n\n#### Wichtige Erkenntnisse\n\n- **Expansion in neue Märkte:** Der Fokus für 2025 liegt auf der Expansion in neue Märkte.\n- **Erhöhtes F&E-Budget:** Das Budget für Forschung und Entwicklung wurde um 15% erhöht.\n- **Neue Hauptprojekte:** Drei neue Hauptprojekte starten im zweiten Quartal.\n\n#### Empfehlungen\n\n1. **Prüfung der Projektpläne:** Die Projektpläne für das zweite Quartal sollten mit besonderem Fokus auf die neuen Hauptprojekte geprüft werden.\n2. **Anpassung der Ressourcenzuweisung:** Die Ressourcenzuweisung sollte entsprechend der Budgeterhöhung angepasst werden.\n3. **Nutzung der Marktanalyse:** Die Marktanalyse sollte als Grundlage für die Expansionsstrategie verwendet werden.\n\n#### Aktualität und Verlässlichkeit\n\n- Alle Dokumente sind aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.\n\nBenötigen Sie weitere Informationen oder haben Sie Fragen dazu?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_5ef95dc1-55ee-414e-a813-17bc2801538e",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:34.618028",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_275972a2-e570-4fb6-8055-aba5f523b536",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:34.636614",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_ef7347e0-1786-4b4e-b53c-f78700fcbf28",
|
||||
"message": "File auszug_liste_positionen.pdf read successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:34.651650",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_9aed866a-156e-41d9-a6f3-97460e418217",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:34.708488",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_790cc8d8-5e9c-449e-99bd-7e94305cc966",
|
||||
"message": "Moderator analysiert die Anfrage und entscheidet über System-Agenten...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:34.715919",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_afd6ead7-25d7-4035-85e0-848bcc88073e",
|
||||
"message": "Moderator hat 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.795150",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_c9fb1d0c-6e98-4e42-9781-9a0fdf614660",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.883562",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f40af039-e69c-4aac-9cf3-32dc7986ee5b",
|
||||
"message": "Agent sharepoint_agent wurde ausgewählt mit Aufgabe: Extrahiere die Tabelle aus der Datei 'auszug_liste...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.894557",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_d3bd787f-7f6b-46ef-9340-4cd89a5c633a",
|
||||
"message": "1 System-Agenten werden ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.904556",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_8ef60e14-32e1-444e-a04f-554604331f0f",
|
||||
"message": "Agent sharepoint_agent wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.915814",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_12718a09-f172-40cb-bb50-faed3977b0c0",
|
||||
"message": "Agent 'sharepoint_agent' wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:36.926397",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_1ab51a7d-bd35-4009-9684-71527b9fc98d",
|
||||
"message": "Agent 'sharepoint_agent' hat geantwortet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:37.084562",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_65ad458d-0d00-4674-9213-1fd21102eb51",
|
||||
"message": "Agent sharepoint_agent hat seine Aufgabe abgeschlossen",
|
||||
"type": "success",
|
||||
"timestamp": "2025-04-07T23:47:37.095567",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_de813227-9c44-4df8-b15c-a28cd74be9a3",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben ihre Aufgaben abgeschlossen. Hie...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:47:41.116577",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,176 @@
|
|||
{
|
||||
"id": "f5bca208-fa29-44e1-91f5-2f2c4222dd6b",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 00:20:51",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T00:20:51.830342",
|
||||
"last_activity": "2025-04-07T00:20:56.975183",
|
||||
"prompt": "kannst du mir zusammenzählen, wieviele positionen auf der liste sind?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_412ff9ba-3064-42ee-a271-d29cd596ae96",
|
||||
"workflow_id": "f5bca208-fa29-44e1-91f5-2f2c4222dd6b",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T00:20:51.859751",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 5,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 5,
|
||||
"name": "auszug_liste_positionen.pdf",
|
||||
"content_type": "application/pdf",
|
||||
"size": 299729,
|
||||
"upload_date": "2025-04-07T00:20:51.988800"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "\n\n"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir zusammenzählen, wieviele positionen auf der liste sind?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"id": "msg_9b83ddef-80a1-4c54-aa46-85e18f1c5a3c",
|
||||
"workflow_id": "f5bca208-fa29-44e1-91f5-2f2c4222dd6b",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T00:20:55.184503",
|
||||
"finished_at": "2025-04-07T00:20:55.205809",
|
||||
"sequence_no": 2,
|
||||
"status": "completed",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "Um die Datei 'auszug_liste_positionen.pdf' zu analysieren und die Anzahl der Positionen auf der Liste zu ermitteln, benötige ich Zugriff auf den Inhalt der Datei. Da ich keine Möglichkeit habe, direkt auf Dateien zuzugreifen oder sie zu öffnen, kann ich die Analyse nicht durchführen.\n\nFalls du den Inhalt der Datei in Textform bereitstellen kannst, wäre ich in der Lage, die Daten zu analysieren und die Anzahl der Positionen zu ermitteln.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyst_agent",
|
||||
"agent_name": "Datenanalyst"
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben ihre Aufgaben abgeschlossen. Hier ist eine Zusammenfassung der Ergebnisse:\n\n**Zusammenfassung der Agentenergebnisse**\n\n**Agent:** Datenanalyst\n\n**Aufgabe:** Analyse der Datei 'auszug_liste_positionen.pdf' zur Ermittlung der Anzahl der Positionen.\n\n**Erkenntnisse:**\n- Der Datenanalyst benötigt Zugriff auf den Inhalt der Datei, um die Analyse durchzuführen.\n- Der Analyst kann nicht direkt auf Dateien zugreifen oder sie öffnen.\n\n**Empfehlungen:**\n- Bereitstellung des Inhalts der Datei in Textform, um die Analyse zu ermöglichen.\n- Alternativ könnte ein anderer Zugang zur Datei organisiert werden, der dem Analysten die benötigten Informationen liefert.\n\nBenötigen Sie weitere Informationen oder haben Sie Fragen dazu?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_9e6ea04c-103e-4411-adb6-702acaee519d",
|
||||
"message": "Neuer Benutzereingabe erhalten - Vorherigen Workflow beenden",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:51.837343",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_28898c96-c1c8-465e-a20f-cca0d9711a31",
|
||||
"message": "Starte Workflow-Ausführung, Nachrichtenlänge: 69, 1 Dateien",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:51.846378",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_83169bc4-1d7f-410f-acf2-fa9059938ac3",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:51.859751",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_ab4d8537-2b20-4cb4-9a13-0899341635f1",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:51.878862",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_faef75bf-c95a-4ffb-afd8-f8a139c40153",
|
||||
"message": "File auszug_liste_positionen.pdf read successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:51.973285",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_b3e5867b-f640-455e-94f8-9d5dc4f5ca3e",
|
||||
"message": "Agenten-Entscheidung abgeschlossen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:53.226226",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_d38009f8-1eca-43d8-9a2d-389a9f9a25ff",
|
||||
"message": "Agent analyst_agent wurde ausgewählt mit Aufgabe: Analysiere die Datei 'auszug_liste_positionen.pdf'...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:53.232731",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_11818c99-4d3a-41c9-9594-67f53b3afe9a",
|
||||
"message": "1 System-Agenten werden ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:53.247917",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_fd13fc3b-3606-4b6d-a7aa-7de5a5ef3472",
|
||||
"message": "Agent analyst_agent wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:53.255538",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_5b59d491-c63a-4981-813e-b37ed40bdd86",
|
||||
"message": "Agent 'analyst_agent' wird ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:53.265607",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_213eb723-c53f-4133-95ef-cb4c41c5894e",
|
||||
"message": "Agent 'analyst_agent' hat geantwortet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:55.329104",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_b34a45f9-10f5-4cf8-b10d-1e8ab9b3d2aa",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben ihre Aufgaben abgeschlossen. Hie...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T00:20:56.975183",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
{
|
||||
"id": "f8c77c45-8b1b-46be-b89c-7005a80eb869",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 07:07:41",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T07:07:41.641897",
|
||||
"last_activity": "2025-04-07T07:07:55.938563",
|
||||
"prompt": "kannst du mir den workflow des bildes beschreiben?",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_71e188cd-5e42-4496-8021-7d6d86e135f1",
|
||||
"workflow_id": "f8c77c45-8b1b-46be-b89c-7005a80eb869",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T07:07:41.655897",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 4,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 4,
|
||||
"name": "LF-Details.png",
|
||||
"content_type": "image/png",
|
||||
"size": 253009,
|
||||
"upload_date": "2025-04-07T07:07:54.413154"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "Image Analysis:\n[Fehler bei der Bildanalyse: string indices must be integers, not 'str']"
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "kannst du mir den workflow des bildes beschreiben?",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Benutzeranfrage bezieht sich auf die Beschreibung eines Workflows in einem Bild. Da die Bildanalyse fehlgeschlagen ist, kann der User-Agent den Benutzer darüber informieren und um weitere Informationen oder eine alternative Datei bitten.",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_ad0f6608-54c8-48bd-bda9-7b5d1e968289",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:41.645897",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_84f1b535-5187-4a9e-a890-82bd3f5eebac",
|
||||
"message": "1 Dateien werden verarbeitet",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:41.662903",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_a14b7499-624e-4858-a4b6-aa8b4a3dc605",
|
||||
"message": "Image LF-Details.png analyzed successfully",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:54.395160",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_72c90704-db97-4687-ae0a-4fd5fec3c197",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:54.459121",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_72074361-efdb-4cec-ae72-6f732a795719",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 Aufgaben zugewiesen",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:55.912226",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_de86e0fb-9aaf-4f5c-872f-b7b95e009012",
|
||||
"message": "Agent user_agent wurde ausgewählt mit Aufgabe: Die Benutzeranfrage bezieht sich auf die Beschreib...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:55.929401",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_f27df5e0-470b-437f-97f2-d50b5ae1d466",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Benutzeranfrage bezieht sich auf die Beschreib...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T07:07:55.938563",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
{
|
||||
"id": "ffd18c78-b213-44f4-977b-60b5a8941ecb",
|
||||
"mandate_id": 1,
|
||||
"user_id": 1,
|
||||
"name": "Workflow 7.4.2025, 23:48:36",
|
||||
"status": "completed",
|
||||
"started_at": "2025-04-07T23:48:37.118533",
|
||||
"last_activity": "2025-04-07T23:48:40.943154",
|
||||
"prompt": "nein, bitte die tabelle aus dem beigelegten pdf-file extrahieren",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_e29ca07b-598e-4f03-bdb3-5c54c3b64bbf",
|
||||
"workflow_id": "ffd18c78-b213-44f4-977b-60b5a8941ecb",
|
||||
"parent_message_id": null,
|
||||
"started_at": "2025-04-07T23:48:37.134998",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "nein, bitte die tabelle aus dem beigelegten pdf-file extrahieren",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"role": "assistant",
|
||||
"content": "[Moderator zu User Agent] Die Agenten haben ihre Aufgaben abgeschlossen. Hier ist eine Zusammenfassung der Ergebnisse:\n\nUm eine aussagekräftige Zusammenfassung der Agentenergebnisse zu erstellen, benötige ich die relevanten Informationen oder Daten aus dem Dokument. Da keine Dateien bereitgestellt wurden, kann ich die Tabelle oder die Ergebnisse nicht direkt extrahieren. \n\nFalls Sie die wichtigsten Punkte oder Absätze aus dem Dokument hier bereitstellen können, helfe ich Ihnen gerne dabei, eine strukturierte Zusammenfassung zu erstellen. Alternativ können Sie die wichtigsten Erkenntnisse und Empfehlungen manuell eingeben, damit ich Ihnen bei der Organisation und Zusammenfassung helfen kann.\n\nBenötigen Sie weitere Informationen oder haben Sie Fragen dazu?",
|
||||
"agent_type": "moderator",
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
"workflow_complete": true
|
||||
}
|
||||
],
|
||||
"logs": [
|
||||
{
|
||||
"id": "log_b5eb131c-e13f-46e9-8cb6-a459cbfe4ef6",
|
||||
"message": "Workflow nach Benutzereingabe fortgesetzt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:37.123998",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_2fbd2010-0dd2-4f5f-b7a9-df3164593d6f",
|
||||
"message": "Moderator analysiert die Anfrage und wählt passende Agenten aus",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:37.162509",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_838f21f4-0a5a-4728-9100-11f63921c325",
|
||||
"message": "Moderator analysiert die Anfrage und entscheidet über System-Agenten...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:37.172729",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_c897dd64-8fa9-431e-a50a-36e1ff79cac9",
|
||||
"message": "Agent 'moderator' liefert eine direkte Antwort",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:38.674737",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_67cda67f-0bb9-4504-92a1-2fb8ebb67ce2",
|
||||
"message": "Moderator hat 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:38.714535",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_af013ca0-0465-4439-aa0e-c9e8842f4786",
|
||||
"message": "Moderator hat die Entscheidung getroffen: 1 System-Agenten ausgewählt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:38.804187",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_cf451113-4ab0-47f2-9f1a-19fdd634b874",
|
||||
"message": "Agent moderator wurde ausgewählt mit Aufgabe: Es wurden keine Dateien bereitgestellt, daher kann...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:38.815223",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_ebd77d19-c54a-41b5-b28b-8d667d118a21",
|
||||
"message": "1 System-Agenten werden ausgeführt",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:38.827363",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
},
|
||||
{
|
||||
"id": "log_009633b4-04cd-42bf-9005-fc2b372c6df5",
|
||||
"message": "Workflow wartet auf Benutzereingabe: Die Agenten haben ihre Aufgaben abgeschlossen. Hie...",
|
||||
"type": "info",
|
||||
"timestamp": "2025-04-07T23:48:40.943154",
|
||||
"agent_id": null,
|
||||
"agent_name": null
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -200,7 +200,7 @@ class ChatService:
|
|||
# Unterscheide zwischen Dateipfad und Binärdaten
|
||||
if isinstance(image_data, str):
|
||||
# Es ist ein Dateipfad - importiere filehandling nur bei Bedarf
|
||||
from modules import agentservice_filehandling as file_handler
|
||||
from gateway.gwserver.modules import agentservice_filemanager as file_handler
|
||||
base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
|
||||
mime_type = mime_type or auto_mime_type
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -109,7 +109,7 @@ class ChatService:
|
|||
# Unterscheide zwischen Dateipfad und Binärdaten
|
||||
if isinstance(image_data, str):
|
||||
# Es ist ein Dateipfad - importiere filehandling nur bei Bedarf
|
||||
from modules import agentservice_filehandling as file_handler
|
||||
from gateway.gwserver.modules import agentservice_filemanager as file_handler
|
||||
base64_data, auto_mime_type = file_handler.encode_file_to_base64(image_data)
|
||||
mime_type = mime_type or auto_mime_type
|
||||
else:
|
||||
|
|
@ -141,7 +141,7 @@ class ChatService:
|
|||
response = await self.call_api(messages)
|
||||
|
||||
# Inhalt extrahieren und zurückgeben
|
||||
return response["choices"][0]["message"]["content"]
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Bildanalyse: {str(e)}", exc_info=True)
|
||||
|
|
|
|||
|
|
@ -1,34 +1,31 @@
|
|||
"""
|
||||
Datenanalyst-Agent für die Analyse und Interpretation von Daten.
|
||||
Angepasst für das refaktorisierte Core-Modul.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils, FileUtils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AnalystAgent(BaseAgent):
|
||||
"""Agent für die Analyse und Interpretation von Daten"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Datenanalyst-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "analyst_agent"
|
||||
self.id = "analyst"
|
||||
self.name = "Datenanalyst"
|
||||
self.type = "analyzer"
|
||||
self.description = "Analysiert und interpretiert Daten"
|
||||
self.capabilities = "Datenanalyse, Mustererkennung, Statistik und Bewertung"
|
||||
self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation"
|
||||
self.instructions = """
|
||||
Du bist der Datenanalyseagent. Deine Aufgabe:
|
||||
1. Vorliegende Daten untersuchen und interpretieren
|
||||
|
|
@ -37,6 +34,25 @@ class AnalystAgent(BaseAgent):
|
|||
4. Daten visualisieren und Konzepte erklären
|
||||
5. Datenqualität bewerten und Handlungsempfehlungen geben
|
||||
"""
|
||||
self.result_format = "AnalysisReport"
|
||||
|
||||
# Chat-Service initialisieren
|
||||
self.chat_service = ChatService()
|
||||
|
||||
# Utility-Klassen initialisieren
|
||||
self.message_utils = MessageUtils()
|
||||
self.file_utils = FileUtils()
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format
|
||||
}
|
||||
|
||||
def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
|
|
@ -60,8 +76,6 @@ class AnalystAgent(BaseAgent):
|
|||
|
||||
Analysiere die vorliegenden Daten. Präsentiere klar strukturierte Ergebnisse
|
||||
mit einer Zusammenfassung, Detailanalyse und Handlungsempfehlungen.
|
||||
|
||||
Formatiere mit [STATUS: ERGEBNIS/TEILWEISE/PLAN] am Ende.
|
||||
"""
|
||||
|
||||
# Dateitypspezifische Anweisungen hinzufügen (verkürzt)
|
||||
|
|
@ -112,32 +126,48 @@ class AnalystAgent(BaseAgent):
|
|||
|
||||
return document_types
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_message(self, message: Dict[str, Any],
|
||||
workflow: Dict[str, Any],
|
||||
context: Dict[str, Any] = None,
|
||||
log_func=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und führt eine Datenanalyse durch.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext (optional)
|
||||
workflow: Der aktuelle Workflow
|
||||
context: Zusätzlicher Kontext
|
||||
log_func: Funktion für Workflow-Logging
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort mit der Datenanalyse
|
||||
"""
|
||||
# Initialize logging
|
||||
workflow_id = workflow.get("id", "unknown")
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
logging_utils.info(f"AnalystAgent startet Datenanalyse", "agents")
|
||||
|
||||
# Create response message
|
||||
response = self.message_utils.create_message(workflow_id, role="assistant")
|
||||
response["agent_type"] = self.type
|
||||
response["agent_name"] = self.name
|
||||
response["parent_message_id"] = message.get("id")
|
||||
|
||||
try:
|
||||
# Prompt generieren
|
||||
# Prepare message context for generating the prompt
|
||||
message_context = {"documents": context.get("documents", [])} if context else {}
|
||||
|
||||
# Generate appropriate prompt based on the context
|
||||
prompt = self.get_prompt(message_context)
|
||||
logging_utils.info(f"Datenanalyse mit spezifischem Prompt gestartet", "agents")
|
||||
|
||||
# OpenAI ChatService initialisieren
|
||||
chat_service = ChatService()
|
||||
|
||||
# Nachrichten für die API vorbereiten
|
||||
# Prepare messages for the API
|
||||
messages = [
|
||||
{"role": "system", "content": prompt},
|
||||
{"role": "user", "content": message.get("content", "")}
|
||||
]
|
||||
|
||||
# Kontext-Nachrichten hinzufügen, falls vorhanden
|
||||
# Add context messages if available
|
||||
if context and "history" in context:
|
||||
for history_item in context["history"]:
|
||||
messages.append({
|
||||
|
|
@ -145,38 +175,29 @@ class AnalystAgent(BaseAgent):
|
|||
"content": history_item.get("content", "")
|
||||
})
|
||||
|
||||
# API aufrufen
|
||||
response_content = await chat_service.call_api(messages)
|
||||
# Call the API
|
||||
logging_utils.info("Rufe AI-Service für die Analyse auf", "agents")
|
||||
response_content = await self.chat_service.call_api(messages)
|
||||
logging_utils.info("Analyse abgeschlossen", "agents")
|
||||
|
||||
# Verbindung schließen
|
||||
await chat_service.close()
|
||||
# Set the content in the response
|
||||
response["content"] = response_content
|
||||
|
||||
# Antwort-Objekt erstellen
|
||||
analysis_response = {
|
||||
"role": "assistant",
|
||||
"content": response_content,
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Finalize the message
|
||||
self.message_utils.finalize_message(response)
|
||||
response["result_format"] = self.result_format
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(analysis_response["content"])
|
||||
analysis_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
|
||||
return analysis_response
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Verarbeitung der Anfrage: {str(e)}", exc_info=True)
|
||||
error_msg = f"Fehler bei der Datenanalyse: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Datenanalyse ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Create error response
|
||||
response["content"] = f"## Fehler bei der Datenanalyse\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
return response
|
||||
|
||||
# Singleton-Instanz
|
||||
_analyst_agent = None
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,35 +1,34 @@
|
|||
"""
|
||||
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
|
||||
Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation.
|
||||
Angepasst für das refaktorisierte Core-Modul.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import json
|
||||
import re
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DocumentationAgent(BaseAgent):
|
||||
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Dokumentations-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "documentation_agent"
|
||||
self.id = "documentation"
|
||||
self.name = "Dokumentation"
|
||||
self.type = "documentation"
|
||||
self.description = "Erstellt Dokumentation und strukturierte Inhalte"
|
||||
self.capabilities = "Berichte, Dokumentation, Zusammenfassungen und Erklärungen"
|
||||
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
|
||||
self.instructions = """
|
||||
Du bist der Dokumentations-Agent. Deine Aufgabe:
|
||||
1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen
|
||||
|
|
@ -38,8 +37,24 @@ class DocumentationAgent(BaseAgent):
|
|||
4. Technische Konzepte verständlich erklären
|
||||
5. Konsistente Formatierung sicherstellen
|
||||
"""
|
||||
self.result_format = "FormattedDocument"
|
||||
|
||||
# Chat-Service initialisieren
|
||||
self.chat_service = None
|
||||
|
||||
# Utility-Klassen initialisieren
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format
|
||||
}
|
||||
|
||||
def get_base_prompt(self, document_type: str = "") -> str:
|
||||
"""
|
||||
|
|
@ -206,7 +221,7 @@ class DocumentationAgent(BaseAgent):
|
|||
...
|
||||
}}
|
||||
|
||||
Beschränke dich auf 5-7 sinnvolle Kapitel, die das Thema umfassend behandeln.
|
||||
Beschränke dich auf sowenige Kapitel wie nötig, die das Thema umfassend behandeln. Schreibe in Prosa und nur als Liste, wenn auch angebracht.
|
||||
"""
|
||||
|
||||
messages = [
|
||||
|
|
@ -237,7 +252,7 @@ class DocumentationAgent(BaseAgent):
|
|||
}
|
||||
|
||||
async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str,
|
||||
task: str, document_type: str, title: str, summary: str) -> str:
|
||||
task: str, document_type: str, title: str, summary: str) -> str:
|
||||
"""
|
||||
Generiert den Inhalt für ein bestimmtes Kapitel.
|
||||
|
||||
|
|
@ -319,17 +334,33 @@ class DocumentationAgent(BaseAgent):
|
|||
|
||||
return doc
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_message(self, message: Dict[str, Any],
|
||||
workflow: Dict[str, Any],
|
||||
context: Dict[str, Any] = None,
|
||||
log_func=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
workflow: Der aktuelle Workflow
|
||||
context: Zusätzlicher Kontext
|
||||
log_func: Funktion für Workflow-Logging
|
||||
|
||||
Returns:
|
||||
Die generierte Dokumentation
|
||||
"""
|
||||
# Initialize logging
|
||||
workflow_id = workflow.get("id", "unknown")
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
logging_utils.info(f"DocumentationAgent startet Dokumentationserstellung", "agents")
|
||||
|
||||
# Create response message
|
||||
response = self.message_utils.create_message(workflow_id, role="assistant")
|
||||
response["agent_type"] = self.type
|
||||
response["agent_name"] = self.name
|
||||
response["parent_message_id"] = message.get("id")
|
||||
|
||||
try:
|
||||
# Chat-Service initialisieren, falls noch nicht geschehen
|
||||
if self.chat_service is None:
|
||||
|
|
@ -342,25 +373,24 @@ class DocumentationAgent(BaseAgent):
|
|||
|
||||
# Dokumenttyp erkennen
|
||||
document_type = self._detect_document_type(task)
|
||||
|
||||
logger.info(f"Starte Dokumentationserstellung für Typ: {document_type}")
|
||||
logging_utils.info(f"Dokumenttyp erkannt: {document_type}", "agents")
|
||||
|
||||
# Schritt 1: Titel generieren
|
||||
title = await self.generate_title(task, document_type)
|
||||
logger.info(f"Titel generiert: {title}")
|
||||
logging_utils.info(f"Titel generiert: {title}", "agents")
|
||||
|
||||
# Schritt 2: Zusammenfassung generieren
|
||||
summary = await self.generate_summary(task, document_type, title)
|
||||
logger.info("Zusammenfassung generiert")
|
||||
logging_utils.info("Zusammenfassung generiert", "agents")
|
||||
|
||||
# Schritt 3: Inhaltsverzeichnis mit Prompts generieren
|
||||
toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary)
|
||||
logger.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert")
|
||||
logging_utils.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert", "agents")
|
||||
|
||||
# Schritt 4: Kapitelinhalte in einer Schleife generieren
|
||||
chapter_contents = {}
|
||||
for chapter_title, chapter_prompt in toc_with_prompts.items():
|
||||
logger.info(f"Generiere Inhalt für Kapitel: {chapter_title}")
|
||||
logging_utils.info(f"Generiere Inhalt für Kapitel: {chapter_title}", "agents")
|
||||
content = await self.generate_chapter_content(
|
||||
chapter_title, chapter_prompt, task, document_type, title, summary
|
||||
)
|
||||
|
|
@ -368,31 +398,24 @@ class DocumentationAgent(BaseAgent):
|
|||
|
||||
# Schritt 5: Dokument zusammenführen
|
||||
final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents)
|
||||
logger.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen")
|
||||
logging_utils.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen", "agents")
|
||||
|
||||
# Schritt 6: Antwort zurückgeben
|
||||
documentation_response = {
|
||||
"role": "assistant",
|
||||
"content": f"{final_document}\n\n[STATUS: ERGEBNIS]",
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Set the content in the response
|
||||
response["content"] = final_document
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(documentation_response["content"])
|
||||
documentation_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
# Finalize the message
|
||||
self.message_utils.finalize_message(response)
|
||||
response["result_format"] = self.result_format
|
||||
|
||||
# Chat-Service schließen
|
||||
await self.chat_service.close()
|
||||
self.chat_service = None
|
||||
|
||||
return documentation_response
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Dokumentationserstellung: {str(e)}", exc_info=True)
|
||||
error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Chat-Service schließen bei Fehler
|
||||
if self.chat_service:
|
||||
|
|
@ -402,12 +425,11 @@ class DocumentationAgent(BaseAgent):
|
|||
pass
|
||||
self.chat_service = None
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Create error response
|
||||
response["content"] = f"## Fehler bei der Dokumentationserstellung\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
return response
|
||||
|
||||
# Singleton-Instanz
|
||||
_documentation_agent = None
|
||||
|
|
@ -417,4 +439,4 @@ def get_documentation_agent():
|
|||
global _documentation_agent
|
||||
if _documentation_agent is None:
|
||||
_documentation_agent = DocumentationAgent()
|
||||
return _documentation_agent
|
||||
return _documentation_agent
|
||||
|
|
@ -1,33 +1,30 @@
|
|||
"""
|
||||
SharePoint-Agent für die Interaktion mit SharePoint-Ressourcen und Dokumenten.
|
||||
Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten.
|
||||
Angepasst für das refaktorisierte Core-Modul.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class SharepointAgent(BaseAgent):
|
||||
"""Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den SharePoint-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "sharepoint_agent"
|
||||
self.id = "sharepoint"
|
||||
self.name = "SharePoint-Agent"
|
||||
self.type = "sharepoint"
|
||||
self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen"
|
||||
self.capabilities = "Suche und Abruf von Dokumenten aus SharePoint, Dokumentenverwaltung, Metadaten-Extraktion und Integration von SharePoint-Inhalten"
|
||||
self.capabilities = "document_search,metadata_extraction,content_integration,sharepoint_interaction"
|
||||
self.instructions = """
|
||||
Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es:
|
||||
|
||||
|
|
@ -46,6 +43,21 @@ class SharepointAgent(BaseAgent):
|
|||
- Hebe wichtige Erkenntnisse und Muster hervor
|
||||
- Biete Kontext und Relevanz für die gefundenen Informationen
|
||||
"""
|
||||
self.result_format = "DocumentList"
|
||||
|
||||
# Utility-Klassen initialisieren
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format
|
||||
}
|
||||
|
||||
def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
"""
|
||||
|
|
@ -88,24 +100,41 @@ class SharepointAgent(BaseAgent):
|
|||
|
||||
return complete_prompt.strip()
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_message(self, message: Dict[str, Any],
|
||||
workflow: Dict[str, Any],
|
||||
context: Dict[str, Any] = None,
|
||||
log_func=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und interagiert mit SharePoint.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
workflow: Der aktuelle Workflow
|
||||
context: Zusätzlicher Kontext
|
||||
log_func: Funktion für Workflow-Logging
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort mit SharePoint-Inhalten
|
||||
"""
|
||||
# Hier würde die tatsächliche Interaktion mit SharePoint stattfinden
|
||||
# In der finalen Implementierung würde ein SharePoint-Connector verwendet werden
|
||||
# Initialize logging
|
||||
workflow_id = workflow.get("id", "unknown")
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
logging_utils.info(f"SharePointAgent startet SharePoint-Interaktion", "agents")
|
||||
|
||||
# Als Beispiel geben wir eine Standardantwort zurück
|
||||
sharepoint_response = {
|
||||
"role": "assistant",
|
||||
"content": f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:
|
||||
# Create response message
|
||||
response = self.message_utils.create_message(workflow_id, role="assistant")
|
||||
response["agent_type"] = self.type
|
||||
response["agent_name"] = self.name
|
||||
response["parent_message_id"] = message.get("id")
|
||||
|
||||
try:
|
||||
# Hier würde die tatsächliche Interaktion mit SharePoint stattfinden
|
||||
# In der finalen Implementierung würde ein SharePoint-Connector verwendet werden
|
||||
|
||||
# Als Beispiel generieren wir eine Standardantwort
|
||||
logging_utils.info("SharePoint-Suche wird simuliert", "agents")
|
||||
|
||||
sharepoint_content = f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:
|
||||
|
||||
## SharePoint-Suchergebnisse
|
||||
|
||||
|
|
@ -146,21 +175,28 @@ Basierend auf den gefundenen Informationen empfehle ich:
|
|||
2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen
|
||||
3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden
|
||||
|
||||
Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.
|
||||
|
||||
[STATUS: ERGEBNIS]""",
|
||||
"agent_type": self.type
|
||||
}
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(sharepoint_response["content"])
|
||||
sharepoint_response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
|
||||
return sharepoint_response
|
||||
Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt."""
|
||||
|
||||
logging_utils.info("SharePoint-Ergebnisse zusammengestellt", "agents")
|
||||
|
||||
# Set the content in the response
|
||||
response["content"] = sharepoint_content
|
||||
|
||||
# Finalize the message
|
||||
self.message_utils.finalize_message(response)
|
||||
response["result_format"] = self.result_format
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Fehler bei der SharePoint-Interaktion: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Create error response
|
||||
response["content"] = f"## Fehler bei der SharePoint-Interaktion\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
return response
|
||||
|
||||
# Singleton-Instanz
|
||||
_sharepoint_agent = None
|
||||
|
|
|
|||
|
|
@ -1,133 +0,0 @@
|
|||
"""
|
||||
User-Agent für die Interaktion mit dem Benutzer im Agentservice.
|
||||
Repräsentiert den menschlichen Benutzer im Workflow und ermöglicht die Kommunikation.
|
||||
Angepasste Version für das neue User-Input-Handling.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import List, Dict, Any, Optional
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class UserAgent(BaseAgent):
|
||||
"""Agent für die Interaktion mit dem Benutzer"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den User-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "user_agent"
|
||||
self.name = "User Agent"
|
||||
self.type = "user"
|
||||
self.description = "Repräsentiert den Benutzer im Workflow"
|
||||
self.capabilities = "Beantwortung von Fragen, Bereitstellung zusätzlicher Informationen, Entscheidungsfindung"
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Gibt Informationen über den User-Agenten zurück.
|
||||
|
||||
Returns:
|
||||
Dict mit Agenten-Informationen
|
||||
"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"name": self.name,
|
||||
"type": self.type,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"used": False,
|
||||
"last_result_status": None
|
||||
}
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und bereitet die Frontend-Interaktion vor.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext
|
||||
|
||||
Returns:
|
||||
Die Nachricht an den Benutzer
|
||||
"""
|
||||
# Task aus dem Kontext extrahieren (falls vorhanden)
|
||||
task = context.get("task", "") if context else ""
|
||||
|
||||
# Aus der Nachricht übernehmen, falls kein Kontext
|
||||
if not task and message and "content" in message:
|
||||
task = message["content"]
|
||||
|
||||
# Explizites Moderator-Prefix für die Nachricht
|
||||
moderator_prefix = "[Moderator zu User Agent] "
|
||||
formatted_task = moderator_prefix + task.strip()
|
||||
|
||||
# Erweiterte Nachricht mit expliziten Flags für das Frontend
|
||||
user_request = {
|
||||
"role": "assistant",
|
||||
"content": formatted_task,
|
||||
"agent_type": "moderator", # Explizit als Moderator markieren
|
||||
"agent_id": "moderator",
|
||||
"agent_name": "Moderator",
|
||||
}
|
||||
|
||||
# Log-Eintrag
|
||||
logger.info(f"User-Agent wird aufgerufen, Workflow wartet auf Benutzereingabe: {task[:50]}...")
|
||||
|
||||
return user_request
|
||||
|
||||
def extract_status(self, content: str) -> tuple:
|
||||
"""
|
||||
Bei User-Agent wird kein Status extrahiert, da der Benutzer keinen Status angibt.
|
||||
|
||||
Args:
|
||||
content: Inhalt der Nachricht
|
||||
|
||||
Returns:
|
||||
Tuple mit (Originalinhalt, "USER_INPUT")
|
||||
"""
|
||||
# User-Agent setzt immer Status "USER_INPUT"
|
||||
return content, "USER_INPUT"
|
||||
|
||||
def format_user_response(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Formatiert eine Benutzerantwort für die Workflow-Verarbeitung.
|
||||
|
||||
Args:
|
||||
message: Die vom Benutzer gesendete Nachricht
|
||||
|
||||
Returns:
|
||||
Die formatierte Nachricht
|
||||
"""
|
||||
# Basisformat für die Antwort
|
||||
formatted_response = {
|
||||
"role": "user",
|
||||
"content": message.get("content", ""),
|
||||
"agent_type": "user",
|
||||
"agent_id": self.id,
|
||||
"agent_name": self.name
|
||||
}
|
||||
|
||||
# Dateien übernehmen, falls vorhanden
|
||||
if "documents" in message:
|
||||
formatted_response["documents"] = message["documents"]
|
||||
|
||||
return formatted_response
|
||||
|
||||
# Singleton-Instanz
|
||||
_user_agent = None
|
||||
|
||||
def get_user_agent():
|
||||
"""Gibt eine Singleton-Instanz des User-Agenten zurück"""
|
||||
global _user_agent
|
||||
if _user_agent is None:
|
||||
_user_agent = UserAgent()
|
||||
return _user_agent
|
||||
|
|
@ -1,20 +1,24 @@
|
|||
"""
|
||||
WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
|
||||
Angepasst für das refaktorisierte Core-Modul.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import random
|
||||
import time
|
||||
from typing import List, Dict, Any, Optional
|
||||
|
||||
import urllib
|
||||
import traceback
|
||||
from typing import List, Dict, Any, Optional, Union
|
||||
import re
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from urllib.parse import quote_plus, unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -22,80 +26,114 @@ logger = logging.getLogger(__name__)
|
|||
class WebcrawlerAgent(BaseAgent):
|
||||
"""Agent für Web-Recherche und Informationsbeschaffung"""
|
||||
|
||||
_instance = None
|
||||
|
||||
chat_service = ChatService()
|
||||
|
||||
#INIT --> should go to config
|
||||
max_url=3
|
||||
max_key=3
|
||||
|
||||
max_result=3
|
||||
|
||||
timeout = 10
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
}
|
||||
max_urls = 10
|
||||
max_content_length=100000
|
||||
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz zurück"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den WebCrawler-Agenten"""
|
||||
super().__init__()
|
||||
self.id = "webcrawler_agent"
|
||||
self.id = "webcrawler"
|
||||
self.name = "Webscraper"
|
||||
self.type = "scraper"
|
||||
self.description = "Recherchiert Informationen im Web"
|
||||
self.capabilities = "Informationsrecherche, Datenbeschaffung aus dem Web, Quellenbewertung und Zusammenführung von Online-Informationen"
|
||||
self.instructions = ""
|
||||
self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
|
||||
self.result_format = "SearchResults"
|
||||
|
||||
# Chat-Service initialisieren
|
||||
self.chat_service = ChatService()
|
||||
|
||||
# Utility-Klassen initialisieren
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
# Web-Crawling-Konfiguration
|
||||
self.max_url = 3
|
||||
self.max_key = 3
|
||||
self.max_result = 3
|
||||
self.timeout = 10
|
||||
self.headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
|
||||
'Accept-Language': 'en-US,en;q=0.5',
|
||||
'Referer': 'https://www.google.com/',
|
||||
'DNT': '1',
|
||||
'Connection': 'keep-alive',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
}
|
||||
self.max_urls = 10
|
||||
self.max_content_length = 100000
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format,
|
||||
"metadata": {
|
||||
"max_url": self.max_url,
|
||||
"max_result": self.max_result,
|
||||
"timeout": self.timeout
|
||||
}
|
||||
}
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_message(self, message: Dict[str, Any],
|
||||
workflow: Dict[str, Any],
|
||||
context: Dict[str, Any] = None,
|
||||
log_func=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
workflow: Der aktuelle Workflow
|
||||
context: Zusätzlicher Kontext
|
||||
log_func: Funktion für Workflow-Logging
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort mit der Web-Recherche
|
||||
"""
|
||||
# Initialize logging
|
||||
workflow_id = workflow.get("id", "unknown")
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
logging_utils.info(f"WebcrawlerAgent startet Web-Recherche", "agents")
|
||||
|
||||
# Create response message
|
||||
response = self.message_utils.create_message(workflow_id, role="assistant")
|
||||
response["agent_type"] = self.type
|
||||
response["agent_name"] = self.name
|
||||
response["parent_message_id"] = message.get("id")
|
||||
|
||||
try:
|
||||
# Get the query from the message
|
||||
prompt = await self.get_prompt(message)
|
||||
logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents")
|
||||
|
||||
# Führe die Web-Recherche durch und warte auf das Ergebnis mit await
|
||||
web_query_result = await self.get_web_query(message)
|
||||
logging_utils.info("Web-Recherche abgeschlossen", "agents")
|
||||
|
||||
# Antwort-Objekt erstellen
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": f"{web_query_result} [STATUS: ERGEBNIS]",
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Set the content in the response
|
||||
response["content"] = web_query_result
|
||||
|
||||
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
|
||||
content, status = self.extract_status(response["content"])
|
||||
response["content"] = content
|
||||
|
||||
# Setze den Status im Kontext, falls vorhanden
|
||||
if context is not None:
|
||||
context["status"] = status
|
||||
# Finalize the message
|
||||
self.message_utils.finalize_message(response)
|
||||
response["result_format"] = self.result_format
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Web-Recherche: {str(e)}", exc_info=True)
|
||||
error_msg = f"Fehler bei der Web-Recherche: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Fehlerantwort zurückgeben
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Bei der Web-Recherche ist ein Fehler aufgetreten: {str(e)}",
|
||||
"agent_type": self.type
|
||||
}
|
||||
# Create error response
|
||||
response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
return response
|
||||
|
||||
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
task = message_context.get("content", "")
|
||||
return task.strip()
|
||||
|
||||
async def get_web_query(self, message_context: Dict[str, Any]) -> str:
|
||||
prompt = await self.get_prompt(message_context)
|
||||
result_json = await self.run_web_query(prompt)
|
||||
|
|
@ -150,15 +188,11 @@ class WebcrawlerAgent(BaseAgent):
|
|||
}
|
||||
]
|
||||
)
|
||||
result = f"{summary}\n\n{result_data}"
|
||||
|
||||
# Format the final result
|
||||
result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
|
||||
return result
|
||||
|
||||
|
||||
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
|
||||
task = message_context.get("content", "")
|
||||
return task.strip()
|
||||
|
||||
|
||||
async def run_web_query(self, prompt: str) -> List[Dict]:
|
||||
if prompt=="":
|
||||
return []
|
||||
|
|
@ -181,7 +215,8 @@ class WebcrawlerAgent(BaseAgent):
|
|||
{
|
||||
"role": "user",
|
||||
"content": ptext
|
||||
} ]
|
||||
}
|
||||
]
|
||||
)
|
||||
# Remove markdown formatting if present
|
||||
if content_text.startswith("```json"):
|
||||
|
|
@ -203,8 +238,6 @@ class WebcrawlerAgent(BaseAgent):
|
|||
logger.error(f"Failed to parse JSON: {e}")
|
||||
logger.error(f"Cleaned content: {content_text[:100]}...")
|
||||
return []
|
||||
|
||||
|
||||
|
||||
async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
|
||||
"""
|
||||
|
|
@ -267,7 +300,6 @@ class WebcrawlerAgent(BaseAgent):
|
|||
logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
|
||||
return results
|
||||
|
||||
|
||||
def search_web(self, query: str) -> List[Dict]:
|
||||
formatted_query = quote_plus(query)
|
||||
url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
|
||||
|
|
@ -328,7 +360,6 @@ class WebcrawlerAgent(BaseAgent):
|
|||
|
||||
return results
|
||||
|
||||
|
||||
def read_url(self, url: str) -> BeautifulSoup:
|
||||
"""
|
||||
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
|
||||
|
|
@ -375,7 +406,6 @@ class WebcrawlerAgent(BaseAgent):
|
|||
# Leeres BeautifulSoup-Objekt erstellen
|
||||
return BeautifulSoup("<html><body></body></html>", 'html.parser')
|
||||
|
||||
|
||||
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
|
||||
# Extract snippet/description
|
||||
snippet_element = data.select_one('.result__snippet')
|
||||
|
|
@ -389,117 +419,6 @@ class WebcrawlerAgent(BaseAgent):
|
|||
}
|
||||
return result
|
||||
|
||||
|
||||
def _old_scrape_url(self, url: str) -> str:
|
||||
try:
|
||||
logger.info(f"Requesting URL: {url}")
|
||||
response = requests.get(url, headers=self.headers, timeout=self.timeout)
|
||||
response.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(response.text, 'html.parser')
|
||||
title = soup.title.string if soup.title else "No title"
|
||||
for element in soup.select('script, style, meta, noscript, iframe, nav, footer, header, aside'):
|
||||
element.extract()
|
||||
main_content = ""
|
||||
|
||||
# Common content containers
|
||||
content_selectors = [
|
||||
'main', '#main', '.main',
|
||||
'article', '.article',
|
||||
'#content', '.content',
|
||||
'.post', '#post',
|
||||
'.entry-content', '.post-content',
|
||||
'.page-content', '.article-content'
|
||||
]
|
||||
|
||||
# Try each selector
|
||||
for selector in content_selectors:
|
||||
elements = soup.select(selector)
|
||||
if elements:
|
||||
main_content = elements[0].get_text(separator='\n', strip=True)
|
||||
logger.info(f"Found content using selector: {selector}")
|
||||
break
|
||||
|
||||
# If no main content found, use body text
|
||||
if not main_content:
|
||||
main_content = soup.body.get_text(separator='\n', strip=True)
|
||||
logger.info("Using body text as no main content container found")
|
||||
|
||||
# Clean up the text
|
||||
lines = []
|
||||
for line in main_content.split('\n'):
|
||||
line = line.strip()
|
||||
if line and len(line) > 15: # Skip very short lines
|
||||
lines.append(line)
|
||||
|
||||
main_content = '\n'.join(lines)
|
||||
|
||||
# Truncate if too long
|
||||
if len(main_content) > self.max_content_length:
|
||||
main_content = main_content[:self.max_content_length] + "...\n[Inhalt gekürzt]"
|
||||
|
||||
return main_content.strip()
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Scrapen von {url}: {str(e)}")
|
||||
return f"[Fehler beim Scrapen von {url}: {str(e)}]"
|
||||
|
||||
|
||||
def _old_extract_urls_from_search_results(self, html_content: str) -> List[str]:
|
||||
"""
|
||||
Extracts URLs from search engine results.
|
||||
|
||||
Args:
|
||||
html_content: HTML content of the search results page
|
||||
|
||||
Returns:
|
||||
List of extracted URLs
|
||||
"""
|
||||
|
||||
soup = BeautifulSoup(html_content, 'html.parser')
|
||||
urls = []
|
||||
|
||||
# Different search engines have different HTML structures
|
||||
# Google links
|
||||
for a_tag in soup.select('a[href^="/url?"]'):
|
||||
href = a_tag.get('href', '')
|
||||
if '/url?q=' in href:
|
||||
url = href.split('/url?q=')[1].split('&')[0]
|
||||
url = urllib.parse.unquote(url)
|
||||
if url.startswith('http') and url not in urls:
|
||||
urls.append(url)
|
||||
|
||||
# Bing links
|
||||
for a_tag in soup.select('a[href^="http"]'):
|
||||
url = a_tag.get('href', '')
|
||||
excluded_domains = getattr(self, 'excluded_domains', [])
|
||||
if (url.startswith('http') and
|
||||
not any(domain in url for domain in excluded_domains) and
|
||||
url not in urls):
|
||||
urls.append(url)
|
||||
|
||||
# Yahoo links
|
||||
for a_tag in soup.select('a.d-ib'):
|
||||
url = a_tag.get('href', '')
|
||||
if url.startswith('http') and url not in urls:
|
||||
urls.append(url)
|
||||
|
||||
# If no URLs found, try a more generic approach
|
||||
if not urls:
|
||||
for a_tag in soup.find_all('a', href=True):
|
||||
url = a_tag['href']
|
||||
excluded_domains = getattr(self, 'excluded_domains', [])
|
||||
if (url.startswith('http') and
|
||||
not any(domain in url for domain in excluded_domains) and
|
||||
url not in urls):
|
||||
urls.append(url)
|
||||
|
||||
# Limit the number of results
|
||||
return urls[:self.max_urls]
|
||||
|
||||
|
||||
|
||||
|
||||
# Singleton-Instanz
|
||||
_webcrawler_agent = None
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
"""
|
||||
Basisklasse für Agenten im Agentservice.
|
||||
Dieser Modul stellt eine Basis-Agent-Klasse für spezialisierte Agenten zur Verfügung.
|
||||
Erweiterte Basisklasse für Agenten im Agentservice.
|
||||
Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit.
|
||||
"""
|
||||
|
||||
import logging
|
||||
|
|
@ -22,6 +22,8 @@ class BaseAgent:
|
|||
Als Basis-Agent kannst du grundlegende Aufgaben erledigen.
|
||||
Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden.
|
||||
"""
|
||||
# Neues Attribut für das Rückgabeformat
|
||||
self.result_format = "Text" # Standard: Textformat
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
|
|
@ -37,6 +39,7 @@ class BaseAgent:
|
|||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"instructions": self.instructions,
|
||||
"result_format": self.result_format, # Rückgabeformat hinzugefügt
|
||||
"used": False, # Wird zur Laufzeit aktualisiert
|
||||
"last_result_status": None # Wird zur Laufzeit aktualisiert
|
||||
}
|
||||
|
|
@ -61,6 +64,8 @@ class BaseAgent:
|
|||
|
||||
{self.instructions}
|
||||
|
||||
Rückgabeformat: {self.result_format}
|
||||
|
||||
Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage.
|
||||
Deklariere am Ende deiner Antwort den Status deines Ergebnisses:
|
||||
[STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast
|
||||
|
|
@ -86,7 +91,8 @@ class BaseAgent:
|
|||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]",
|
||||
"agent_type": self.type
|
||||
"agent_type": self.type,
|
||||
"result_format": self.result_format # Rückgabeformat in der Antwort
|
||||
}
|
||||
|
||||
def extract_status(self, content: str) -> Tuple[str, str]:
|
||||
|
|
|
|||
750
gwserver/modules/agentservice_code_helpers.py
Normal file
750
gwserver/modules/agentservice_code_helpers.py
Normal file
|
|
@ -0,0 +1,750 @@
|
|||
"""
|
||||
Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code.
|
||||
Integriert direkten Code-Executor zur Vereinfachung des Ablaufs.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import asyncio
|
||||
import re
|
||||
import uuid
|
||||
import subprocess
|
||||
import tempfile
|
||||
import traceback
|
||||
import sys
|
||||
import importlib.util
|
||||
import inspect
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
|
||||
from modules.agentservice_base import BaseAgent
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
from modules import agentservice_code_helpers
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class CodeExecutor:
|
||||
"""
|
||||
Führt generierten Code in einer isolierten virtuellen Umgebung aus,
|
||||
während Zugriff auf spezifische App-Module gewährt wird und
|
||||
automatisch erforderliche Pakete installiert werden.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
app_modules: List[str] = None,
|
||||
venv_path: Optional[str] = None,
|
||||
timeout: int = 30,
|
||||
max_memory_mb: int = 512,
|
||||
allowed_packages: List[str] = None,
|
||||
blocked_packages: List[str] = None):
|
||||
"""
|
||||
Initialisiert den CodeExecutor.
|
||||
|
||||
Args:
|
||||
app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen
|
||||
venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt
|
||||
timeout: Maximale Ausführungszeit in Sekunden
|
||||
max_memory_mb: Maximaler Arbeitsspeicher in MB
|
||||
allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte)
|
||||
blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive)
|
||||
"""
|
||||
self.app_modules = app_modules or []
|
||||
self.venv_path = venv_path
|
||||
self.timeout = timeout
|
||||
self.max_memory_mb = max_memory_mb
|
||||
self.temp_dir = None
|
||||
self.allowed_packages = allowed_packages
|
||||
self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"]
|
||||
|
||||
def _create_venv(self) -> str:
|
||||
"""Erstellt eine virtuelle Umgebung und gibt den Pfad zurück."""
|
||||
if self.venv_path and os.path.exists(self.venv_path):
|
||||
return self.venv_path
|
||||
|
||||
# Temporäres Verzeichnis für die virtuelle Umgebung erstellen
|
||||
self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_")
|
||||
venv_path = os.path.join(self.temp_dir, "venv")
|
||||
|
||||
try:
|
||||
# Virtuelle Umgebung erstellen
|
||||
logger.info(f"Erstelle virtuelle Umgebung in {venv_path}")
|
||||
subprocess.run([sys.executable, "-m", "venv", venv_path],
|
||||
check=True,
|
||||
capture_output=True)
|
||||
return venv_path
|
||||
except subprocess.CalledProcessError as e:
|
||||
logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}")
|
||||
raise RuntimeError(f"Konnte venv nicht erstellen: {e}")
|
||||
|
||||
def _get_pip_executable(self, venv_path: str) -> str:
|
||||
"""Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung."""
|
||||
if os.name == 'nt': # Windows
|
||||
return os.path.join(venv_path, "Scripts", "pip.exe")
|
||||
else: # Unix/Linux
|
||||
return os.path.join(venv_path, "bin", "pip")
|
||||
|
||||
def _get_python_executable(self, venv_path: str) -> str:
|
||||
"""Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung."""
|
||||
if os.name == 'nt': # Windows
|
||||
return os.path.join(venv_path, "Scripts", "python.exe")
|
||||
else: # Unix/Linux
|
||||
return os.path.join(venv_path, "bin", "python")
|
||||
|
||||
def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]:
|
||||
"""
|
||||
Installiert Pakete in der virtuellen Umgebung.
|
||||
|
||||
Args:
|
||||
packages: Liste der zu installierenden Pakete
|
||||
venv_path: Pfad zur virtuellen Umgebung
|
||||
|
||||
Returns:
|
||||
Tuple aus (Erfolg, Fehlermeldung)
|
||||
"""
|
||||
if not packages:
|
||||
return True, ""
|
||||
|
||||
# Überprüfen, ob Pakete erlaubt sind
|
||||
blocked = []
|
||||
for package in packages:
|
||||
# Paketname ohne Version extrahieren
|
||||
pkg_name = re.split('[=<>]', package)[0].strip()
|
||||
|
||||
if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]:
|
||||
blocked.append(pkg_name)
|
||||
|
||||
if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]:
|
||||
blocked.append(pkg_name)
|
||||
|
||||
if blocked:
|
||||
return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}"
|
||||
|
||||
# Pakete installieren
|
||||
pip_executable = self._get_pip_executable(venv_path)
|
||||
logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}")
|
||||
|
||||
try:
|
||||
# pip aktualisieren - mache diesen Schritt optional
|
||||
try:
|
||||
subprocess.run(
|
||||
[pip_executable, "install", "--upgrade", "pip"],
|
||||
check=False, # Changed from True to False to make it optional
|
||||
capture_output=True,
|
||||
timeout=60
|
||||
)
|
||||
except Exception as pip_error:
|
||||
# Log the error but continue
|
||||
logger.warning(f"Pip-Upgrade fehlgeschlagen, fahre mit Paketinstallation fort: {pip_error}")
|
||||
|
||||
# Pakete installieren
|
||||
process = subprocess.run(
|
||||
[pip_executable, "install"] + packages,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=120 # 2 Minuten Timeout für Paketinstallation
|
||||
)
|
||||
|
||||
return True, process.stdout
|
||||
except subprocess.CalledProcessError as e:
|
||||
error_msg = f"Fehler bei der Paketinstallation: {e.stderr}"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
except subprocess.TimeoutExpired:
|
||||
return False, "Zeitüberschreitung bei der Paketinstallation."
|
||||
except Exception as e:
|
||||
return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}"
|
||||
|
||||
|
||||
def _extract_required_packages(self, code: str) -> List[str]:
|
||||
"""
|
||||
Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements
|
||||
und Pip-Installationsanweisungen.
|
||||
|
||||
Args:
|
||||
code: Der Python-Code
|
||||
|
||||
Returns:
|
||||
Liste der erkannten Paketnamen
|
||||
"""
|
||||
packages = set()
|
||||
|
||||
# Paketkommentare erkennen (# pip install package)
|
||||
pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
|
||||
for comment in pip_comments:
|
||||
for pkg in comment.split():
|
||||
if pkg and not pkg.startswith('-'):
|
||||
packages.add(pkg.strip())
|
||||
|
||||
# Import-Statements analysieren
|
||||
import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
|
||||
|
||||
# Standardmodule, die nicht installiert werden müssen
|
||||
std_modules = {
|
||||
'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
|
||||
'collections', 'itertools', 'functools', 'pathlib', 'shutil',
|
||||
'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
|
||||
'traceback', 'io', 'copy'
|
||||
}
|
||||
|
||||
# Module der App, die nicht installiert werden müssen
|
||||
app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules)
|
||||
|
||||
for module in import_lines:
|
||||
if module not in std_modules and module not in app_modules_prefixes:
|
||||
packages.add(module)
|
||||
|
||||
return list(packages)
|
||||
|
||||
def _create_module_loader(self) -> str:
|
||||
"""
|
||||
Erstellt ein Hilfsskript, das App-Module in die venv importiert.
|
||||
Gibt den Pfad zum Hilfsskript zurück.
|
||||
"""
|
||||
if not self.app_modules:
|
||||
return ""
|
||||
|
||||
# Temporäre Datei für den Module-Loader erstellen
|
||||
module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
"module_loader.py")
|
||||
|
||||
# Pfad zu den App-Modulen bestimmen
|
||||
app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
|
||||
|
||||
# Modul-Loader-Code generieren
|
||||
loader_code = f"""
|
||||
import sys
|
||||
import importlib.util
|
||||
import os
|
||||
|
||||
# App-Pfad zum Suchpfad hinzufügen
|
||||
sys.path.insert(0, "{app_path}")
|
||||
|
||||
# Module importieren
|
||||
modules = {{}}
|
||||
"""
|
||||
|
||||
# Code zum Importieren der Module hinzufügen
|
||||
for module_name in self.app_modules:
|
||||
loader_code += f"""
|
||||
try:
|
||||
modules["{module_name}"] = __import__("{module_name}", fromlist=["*"])
|
||||
print(f"Modul '{module_name}' erfolgreich importiert")
|
||||
except ImportError as e:
|
||||
print(f"Fehler beim Importieren von '{module_name}': {{e}}")
|
||||
"""
|
||||
|
||||
# Loader-Datei schreiben
|
||||
with open(module_loader_path, "w") as f:
|
||||
f.write(loader_code)
|
||||
|
||||
return module_loader_path
|
||||
|
||||
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt den generierten Code in einer isolierten Umgebung aus.
|
||||
|
||||
Args:
|
||||
code: Der auszuführende Python-Code
|
||||
input_data: Eingabedaten für den Code (werden als JSON serialisiert)
|
||||
|
||||
Returns:
|
||||
Dict mit Ausführungsergebnissen, Ausgabe und Fehlern
|
||||
"""
|
||||
# Virtuelle Umgebung erstellen oder bestehende verwenden
|
||||
venv_path = self._create_venv()
|
||||
|
||||
# Erforderliche Pakete aus dem Code extrahieren
|
||||
required_packages = self._extract_required_packages(code)
|
||||
|
||||
# Pakete installieren, falls erforderlich
|
||||
install_success = True
|
||||
install_log = ""
|
||||
if required_packages:
|
||||
install_success, install_log = self._install_packages(required_packages, venv_path)
|
||||
|
||||
if not install_success:
|
||||
return {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
# Temporäre Datei für den Code erstellen
|
||||
code_id = str(uuid.uuid4())[:8]
|
||||
code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"ai_code_{code_id}.py")
|
||||
|
||||
# Module-Loader erstellen
|
||||
module_loader_path = self._create_module_loader()
|
||||
|
||||
# Eingabedaten als JSON speichern, wenn vorhanden
|
||||
input_path = ""
|
||||
if input_data:
|
||||
import json
|
||||
input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"input_{code_id}.json")
|
||||
with open(input_path, "w") as f:
|
||||
json.dump(input_data, f)
|
||||
|
||||
# Outputpfad für Ergebnisse
|
||||
output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
|
||||
f"output_{code_id}.json")
|
||||
|
||||
# Prepare all paths using forward slashes for consistency across platforms
|
||||
safe_module_loader_path = module_loader_path.replace('\\', '/') if module_loader_path else ""
|
||||
safe_input_path = input_path.replace('\\', '/') if input_path else ""
|
||||
safe_output_path = output_path.replace('\\', '/')
|
||||
|
||||
wrapped_code = f"""
|
||||
# -*- coding: utf-8 -*-
|
||||
# coding: utf-8
|
||||
import sys
|
||||
import json
|
||||
import traceback
|
||||
import os
|
||||
|
||||
# Ergebnisstruktur
|
||||
result = {{
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": "",
|
||||
"result": None,
|
||||
"installed_packages": {required_packages}
|
||||
}}
|
||||
|
||||
try:
|
||||
# Module laden, falls erforderlich
|
||||
if "{safe_module_loader_path}":
|
||||
module_loader = __import__("module_loader")
|
||||
globals().update({{k: v for k, v in module_loader.modules.items()}})
|
||||
|
||||
# Eingabedaten laden, falls vorhanden
|
||||
input_data = None
|
||||
if "{safe_input_path}":
|
||||
with open("{safe_input_path}", "r") as f:
|
||||
input_data = json.load(f)
|
||||
|
||||
# Ausgabeumleitung
|
||||
from io import StringIO
|
||||
original_stdout = sys.stdout
|
||||
original_stderr = sys.stderr
|
||||
captured_stdout = StringIO()
|
||||
captured_stderr = StringIO()
|
||||
sys.stdout = captured_stdout
|
||||
sys.stderr = captured_stderr
|
||||
|
||||
# Benutzercode ausführen
|
||||
try:
|
||||
# Den Code in einem lokalen Namespace ausführen
|
||||
local_vars = {{"input_data": input_data}}
|
||||
exec('''{code}''', globals(), local_vars)
|
||||
|
||||
# Ergebnis speichern, falls eine Variable 'result' definiert wurde
|
||||
if "result" in local_vars:
|
||||
result["result"] = local_vars["result"]
|
||||
|
||||
result["success"] = True
|
||||
except Exception as e:
|
||||
result["error"] = str(e)
|
||||
result["error"] += "\\n" + traceback.format_exc()
|
||||
finally:
|
||||
# Ausgabe erfassen
|
||||
result["output"] = captured_stdout.getvalue()
|
||||
result["error"] += captured_stderr.getvalue()
|
||||
|
||||
# Ausgabeumleitung zurücksetzen
|
||||
sys.stdout = original_stdout
|
||||
sys.stderr = original_stderr
|
||||
|
||||
except Exception as outer_e:
|
||||
result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}"
|
||||
|
||||
# Ergebnis speichern
|
||||
with open("{safe_output_path}", "w") as f:
|
||||
json.dump(result, f, default=str)
|
||||
"""
|
||||
|
||||
# Code in temporäre Datei schreiben with UTF-8 encoding
|
||||
with open(code_file_path, "w", encoding="utf-8") as f:
|
||||
f.write(wrapped_code)
|
||||
|
||||
# Python-Interpreter aus der virtuellen Umgebung bestimmen
|
||||
python_executable = self._get_python_executable(venv_path)
|
||||
|
||||
# Code ausführen
|
||||
logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}")
|
||||
try:
|
||||
# Prozess mit Ressourcenbeschränkungen ausführen
|
||||
cmd = [python_executable, code_file_path]
|
||||
|
||||
# Umgebungsvariablen setzen, um Speicherlimit zu erzwingen
|
||||
env = os.environ.copy()
|
||||
if self.max_memory_mb:
|
||||
if os.name == 'posix': # Unix/Linux
|
||||
# Auf Unix-Systemen können wir ulimit verwenden
|
||||
cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"]
|
||||
elif os.name == 'nt': # Windows
|
||||
# Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden
|
||||
# Hier müsste eine komplexere Lösung implementiert werden
|
||||
pass
|
||||
|
||||
# Prozess starten und mit Timeout ausführen
|
||||
process = subprocess.run(
|
||||
cmd,
|
||||
timeout=self.timeout,
|
||||
env=env,
|
||||
capture_output=True,
|
||||
text=True
|
||||
)
|
||||
|
||||
# Ergebnis aus der Ausgabedatei lesen
|
||||
if os.path.exists(output_path):
|
||||
with open(output_path, "r") as f:
|
||||
import json
|
||||
execution_result = json.load(f)
|
||||
else:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": process.stdout,
|
||||
"error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
except subprocess.TimeoutExpired:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
except Exception as e:
|
||||
execution_result = {
|
||||
"success": False,
|
||||
"output": "",
|
||||
"error": f"Fehler bei der Ausführung: {str(e)}",
|
||||
"result": None,
|
||||
"installed_packages": required_packages
|
||||
}
|
||||
|
||||
# Informationen zur Paketinstallation hinzufügen
|
||||
if install_log:
|
||||
execution_result["package_install_log"] = install_log
|
||||
|
||||
# Temporäre Dateien aufräumen
|
||||
self._cleanup_temp_files([code_file_path, input_path, output_path])
|
||||
|
||||
return execution_result
|
||||
|
||||
|
||||
def _cleanup_temp_files(self, file_paths: List[str]):
|
||||
"""Räumt temporäre Dateien auf."""
|
||||
for path in file_paths:
|
||||
if path and os.path.exists(path):
|
||||
try:
|
||||
os.remove(path)
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}")
|
||||
|
||||
def cleanup(self):
|
||||
"""Räumt alle temporären Ressourcen auf."""
|
||||
if self.temp_dir and os.path.exists(self.temp_dir):
|
||||
import shutil
|
||||
try:
|
||||
shutil.rmtree(self.temp_dir)
|
||||
logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}")
|
||||
|
||||
def __del__(self):
|
||||
"""Aufräumen beim Garbage Collection."""
|
||||
self.cleanup()
|
||||
|
||||
|
||||
class CoderAgent(BaseAgent):
|
||||
"""Erweiterter Agent für die Entwicklung und Ausführung von Python-Code"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the coder agent with proper type and capabilities"""
|
||||
super().__init__()
|
||||
|
||||
# Agent metadata
|
||||
self.id = "coder"
|
||||
self.type = "coder"
|
||||
self.name = "Python Code Agent"
|
||||
self.description = "Entwickelt und führt Python-Code aus"
|
||||
self.capabilities = "code_development,data_processing,file_processing,automation"
|
||||
self.result_format = "python_code"
|
||||
|
||||
# Init utilities
|
||||
self.file_utils = FileUtils()
|
||||
self.message_utils = MessageUtils()
|
||||
|
||||
# Executor settings
|
||||
self.executor_timeout = 60 # seconds
|
||||
self.executor_memory_limit = 512 # MB
|
||||
|
||||
# AI service settings
|
||||
self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation
|
||||
self.ai_max_tokens = 2000 # Enough tokens for complex code
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Get agent information for agent registry"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"type": self.type,
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities,
|
||||
"result_format": self.result_format,
|
||||
"metadata": {
|
||||
"timeout": self.executor_timeout,
|
||||
"memory_limit": self.executor_memory_limit
|
||||
}
|
||||
}
|
||||
|
||||
async def process_message(self, message: Dict[str, Any],
|
||||
workflow: Dict[str, Any],
|
||||
context: Dict[str, Any] = None,
|
||||
log_func=None) -> Dict[str, Any]:
|
||||
"""
|
||||
Processes a message to develop and execute Python code.
|
||||
|
||||
Args:
|
||||
message: The message to process
|
||||
workflow: The current workflow
|
||||
context: Additional context information
|
||||
log_func: Function for workflow logging
|
||||
|
||||
Returns:
|
||||
Response message
|
||||
"""
|
||||
# Initialize logging
|
||||
workflow_id = workflow.get("id")
|
||||
logging_utils = LoggingUtils(workflow_id, log_func)
|
||||
logging_utils.info(f"CoderAgent startet Verarbeitung", "agents")
|
||||
|
||||
# Initialize utilities
|
||||
workflow_utils = WorkflowUtils(workflow_id)
|
||||
|
||||
# Create response message
|
||||
response = self.message_utils.create_message(workflow_id, role="assistant")
|
||||
response["agent_type"] = self.type
|
||||
response["agent_name"] = self.name
|
||||
response["parent_message_id"] = message.get("id")
|
||||
|
||||
try:
|
||||
# Check if user directly provided code
|
||||
content = message.get("content", "")
|
||||
documents = message.get("documents", [])
|
||||
|
||||
# Extract code from message content
|
||||
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
|
||||
code_to_execute = None
|
||||
|
||||
if code_blocks:
|
||||
# Use the first code block found
|
||||
code_to_execute = code_blocks[0]
|
||||
logging_utils.info(f"Code aus Nachricht extrahiert ({len(code_to_execute)} Zeichen)", "agents")
|
||||
else:
|
||||
# Generate code based on the message content using OpenAI
|
||||
logging_utils.info("Kein Code in der Nachricht gefunden, generiere neuen Code mit AI", "agents")
|
||||
|
||||
# Generate code using AI
|
||||
code_to_execute = await self._generate_code_from_prompt(content, documents, context)
|
||||
if not code_to_execute:
|
||||
logging_utils.warning("AI konnte keinen Code generieren", "agents")
|
||||
response["content"] = "Ich konnte basierend auf Ihrer Anfrage keinen ausführbaren Code generieren. Bitte geben Sie detailliertere Anweisungen an."
|
||||
self.message_utils.finalize_message(response)
|
||||
return response
|
||||
logging_utils.info(f"Code mit AI generiert ({len(code_to_execute)} Zeichen)", "agents")
|
||||
|
||||
# Get database interface for code execution
|
||||
mandate_id = workflow.get("mandate_id", 0)
|
||||
user_id = workflow.get("user_id", 0)
|
||||
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
||||
# Execute the code
|
||||
if code_to_execute:
|
||||
logging_utils.info("Führe Code aus", "execution")
|
||||
|
||||
# Prepare execution context
|
||||
execution_context = {
|
||||
"workflow_id": workflow_id,
|
||||
"documents": documents,
|
||||
"message": message,
|
||||
"mandate_id": mandate_id,
|
||||
"user_id": user_id
|
||||
}
|
||||
|
||||
# Execute code
|
||||
result = await self._execute_code(code_to_execute, lucydom_interface, execution_context)
|
||||
|
||||
# Prepare response
|
||||
if result.get("success", False):
|
||||
# Code execution successful
|
||||
output = result.get("output", "")
|
||||
execution_result = result.get("result")
|
||||
logging_utils.info("Code erfolgreich ausgeführt", "execution")
|
||||
|
||||
# Format response content
|
||||
response_content = f"## Code erfolgreich ausgeführt\n\n"
|
||||
|
||||
# Include the executed code
|
||||
response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n"
|
||||
|
||||
# Include the output if available
|
||||
if output:
|
||||
response_content += f"### Ausgabe\n\n```\n{output}\n```\n\n"
|
||||
|
||||
# Include the execution result if available
|
||||
if execution_result:
|
||||
result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
|
||||
response_content += f"### Ergebnis\n\n```\n{result_str}\n```\n\n"
|
||||
|
||||
response["content"] = response_content
|
||||
|
||||
# Process any files created by the code
|
||||
if isinstance(execution_result, dict) and "created_files" in execution_result:
|
||||
created_files = execution_result.get("created_files", [])
|
||||
for file_info in created_files:
|
||||
file_id = file_info.get("id")
|
||||
if file_id:
|
||||
logging_utils.info(f"Füge erstellte Datei {file_info.get('name', file_id)} zu Dokumenten hinzu", "files")
|
||||
file_meta = lucydom_interface.get_file(file_id)
|
||||
if file_meta:
|
||||
# Add file document to the response
|
||||
doc = {
|
||||
"id": f"doc_{uuid.uuid4()}",
|
||||
"source": file_meta,
|
||||
"type": "file"
|
||||
}
|
||||
response["documents"].append(doc)
|
||||
else:
|
||||
# Code execution failed
|
||||
error = result.get("error", "Unbekannter Fehler")
|
||||
logging_utils.error(f"Fehler bei der Codeausführung: {error}", "execution")
|
||||
|
||||
# Format error response
|
||||
response_content = f"## Fehler bei der Codeausführung\n\n"
|
||||
response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n"
|
||||
response_content += f"### Fehler\n\n```\n{error}\n```\n\n"
|
||||
|
||||
# Add recommendation based on error
|
||||
response_content += self._get_error_recommendation(error)
|
||||
|
||||
response["content"] = response_content
|
||||
else:
|
||||
# No code to execute
|
||||
response["content"] = "Ich konnte keinen ausführbaren Code finden oder generieren. Bitte geben Sie Python-Code an oder erläutern Sie Ihre Anforderungen genauer."
|
||||
|
||||
# Finalize response
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
# Log success
|
||||
logging_utils.info("CoderAgent hat die Anfrage erfolgreich verarbeitet", "agents")
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Fehler bei der Verarbeitung durch den CoderAgent: {str(e)}"
|
||||
logging_utils.error(error_msg, "error")
|
||||
|
||||
# Create error response
|
||||
response["content"] = f"## Fehler bei der Verarbeitung\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
|
||||
self.message_utils.finalize_message(response)
|
||||
|
||||
return response
|
||||
|
||||
|
||||
async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]], context: Dict[str, Any] = None) -> str:
|
||||
"""
|
||||
Generate Python code from a prompt using OpenAI service.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to generate code from
|
||||
documents: Documents associated with the prompt
|
||||
context: Additional context information
|
||||
|
||||
Returns:
|
||||
Generated Python code
|
||||
"""
|
||||
try:
|
||||
# Initialize AI service
|
||||
chat_service = ChatService()
|
||||
|
||||
# Prepare a detailed prompt for code generation
|
||||
ai_prompt = self._prepare_code_prompt(prompt, documents)
|
||||
|
||||
# Create messages for the OpenAI API
|
||||
messages = [
|
||||
{"role": "system", "content": "You are a Python code generator. Generate only executable Python code without explanations. The code should be well-commented, handle errors appropriately, and follow best practices."},
|
||||
{"role": "user", "content": ai_prompt}
|
||||
]
|
||||
|
||||
# Call the OpenAI API
|
||||
logging.info(f"Calling OpenAI API to generate code")
|
||||
generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
|
||||
|
||||
# Extract code from the response (the AI might wrap it in markdown)
|
||||
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content)
|
||||
|
||||
if code_blocks:
|
||||
# Use the first code block found
|
||||
return code_blocks[0].strip()
|
||||
else:
|
||||
# If no code block is found, return the raw response
|
||||
return generated_content.strip()
|
||||
|
||||
except Exception as e:
|
||||
logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
|
||||
# Return a basic error-handling code
|
||||
estr=str(e).replace('"', '\\"')
|
||||
return f"""
|
||||
# Error during code generation
|
||||
print(f"An error occurred during code generation: {estr}")
|
||||
# Return an error result
|
||||
result = {{"error": "Code generation failed", "message": "{estr}"}}
|
||||
"""
|
||||
|
||||
def _prepare_code_prompt(self, user_prompt: str, documents: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Prepares a detailed prompt for the AI to generate Python code.
|
||||
|
||||
Args:
|
||||
user_prompt: The original user request
|
||||
documents: Available documents
|
||||
|
||||
Returns:
|
||||
A detailed prompt for code generation
|
||||
"""
|
||||
# Start with the user's request
|
||||
prompt = f"""Generate Python code to solve the following task:
|
||||
{user_prompt}
|
||||
|
||||
"""
|
||||
|
||||
# Add information about available documents
|
||||
if documents:
|
||||
prompt += "\nAvailable documents:\n"
|
||||
for i, doc in enumerate(documents):
|
||||
source = doc.get("source", {})
|
||||
doc_name = source.get("name", f"Document {i+1}")
|
||||
doc_type = source.get("content_type", "unknown")
|
||||
doc_id = source.get("id", "")
|
||||
|
||||
prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n"
|
||||
|
||||
# Add information about how to access documents
|
||||
prompt += """
|
||||
To access these documents, use:
|
||||
- await load_file(file_id, encoding='utf-8') for text files
|
||||
- await load_file(file_id) for binary files
|
||||
"""
|
||||
494
gwserver/modules/agentservice_dataextraction.py
Normal file
494
gwserver/modules/agentservice_dataextraction.py
Normal file
|
|
@ -0,0 +1,494 @@
|
|||
"""
|
||||
Refactored helper function for intelligent data extraction (continued).
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
from typing import List, Dict, Any, Optional, Tuple
|
||||
import asyncio
|
||||
from datetime import datetime
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def data_extraction(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
lucydom_interface = None,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen,
|
||||
und führt dann die notwendigen Extraktionen durch.
|
||||
|
||||
Args:
|
||||
prompt: Spezifizierung, welche Daten extrahiert werden sollen
|
||||
files: Liste aller verfügbaren Dateien mit Metadaten
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
ai_service: Service für KI-Anfragen
|
||||
lucydom_interface: Interface für Datenbankzugriffe (optional)
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen
|
||||
"""
|
||||
try:
|
||||
# 1. AI Call zur Bestimmung der notwendigen Extraktionen
|
||||
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
|
||||
|
||||
# 2. Extraktionen durchführen
|
||||
extracted_data = await _execute_extractions(
|
||||
extraction_plan,
|
||||
files,
|
||||
messages,
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id,
|
||||
add_log_func
|
||||
)
|
||||
|
||||
# 3. Extrahierte Daten strukturieren
|
||||
structured_result = _structure_extracted_data(extracted_data, files, prompt)
|
||||
|
||||
return structured_result
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True)
|
||||
|
||||
# Fehler-Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error")
|
||||
|
||||
# Fehler-Ergebnis zurückgeben
|
||||
return {
|
||||
"error": str(e),
|
||||
"status": "error",
|
||||
"files_processed": len(files),
|
||||
"message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}"
|
||||
}
|
||||
|
||||
async def _create_extraction_plan(
|
||||
prompt: str,
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Erstellt einen Extraktionsplan mit AI-Unterstützung.
|
||||
|
||||
Args:
|
||||
prompt: Spezifizierung, welche Daten extrahiert werden sollen
|
||||
files: Liste aller verfügbaren Dateien mit Metadaten
|
||||
messages: Liste aller Nachrichten im Workflow
|
||||
ai_service: Service für KI-Anfragen
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
|
||||
Returns:
|
||||
Extraktionsplan (Liste von Extraktionsanweisungen pro Datei)
|
||||
"""
|
||||
# Erstelle Kontext-Informationen für den AI Call
|
||||
file_infos = []
|
||||
for file in files:
|
||||
# Basis-Metadaten
|
||||
file_info = {
|
||||
"id": file.get("id", ""),
|
||||
"name": file.get("name", ""),
|
||||
"type": file.get("type", ""),
|
||||
"content_type": file.get("content_type", ""),
|
||||
"size": file.get("size", "")
|
||||
}
|
||||
|
||||
# Extraktionsstatus prüfen (falls vorhanden)
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
|
||||
if doc_contents:
|
||||
# Prüfen, ob mindestens ein Content mit is_extracted=True existiert
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
)
|
||||
file_info["already_extracted"] = already_extracted
|
||||
|
||||
# Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar)
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text" and content.get("text"):
|
||||
preview_text = content.get("text", "")[:200] + "..." if len(content.get("text", "")) > 200 else content.get("text", "")
|
||||
file_info["content_preview"] = preview_text
|
||||
break
|
||||
else:
|
||||
file_info["already_extracted"] = False
|
||||
|
||||
file_infos.append(file_info)
|
||||
|
||||
# AI-Prompt erstellen
|
||||
extraction_prompt = f"""
|
||||
Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien
|
||||
und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen.
|
||||
|
||||
AUFGABE:
|
||||
{prompt}
|
||||
|
||||
VERFÜGBARE DATEIEN:
|
||||
{json.dumps(file_infos, indent=2)}
|
||||
|
||||
Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen:
|
||||
1. file_id: Die ID der zu extrahierenden Datei
|
||||
2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird)
|
||||
3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien)
|
||||
4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist)
|
||||
|
||||
Format:
|
||||
[
|
||||
{{
|
||||
"file_id": 1234,
|
||||
"extract_needed": true,
|
||||
"extraction_prompt": "Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen",
|
||||
"importance": 5
|
||||
}},
|
||||
...
|
||||
]
|
||||
|
||||
Gib nur das JSON-Array zurück, ohne weitere Erklärungen.
|
||||
"""
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, "Extraktionsplan wird erstellt...", "info")
|
||||
|
||||
try:
|
||||
# AI-Call durchführen
|
||||
extraction_plan_response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
||||
|
||||
# JSON aus der Antwort extrahieren
|
||||
import re
|
||||
json_match = re.search(r'\[.*\]', extraction_plan_response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
extraction_plan = json.loads(json_match.group(0))
|
||||
|
||||
# Log hinzufügen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Extraktionsplan erstellt für {len(extraction_plan)} Dateien",
|
||||
"info"
|
||||
)
|
||||
|
||||
return extraction_plan
|
||||
else:
|
||||
# Fallback bei Parsing-Problemen
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
"Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan",
|
||||
"warning"
|
||||
)
|
||||
|
||||
# Standard-Plan: Alle nicht extrahierten Dateien extrahieren
|
||||
default_plan = []
|
||||
for file in files:
|
||||
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
|
||||
already_extracted = any(
|
||||
content.get("is_extracted", False) for content in doc_contents
|
||||
) if doc_contents else False
|
||||
|
||||
default_plan.append({
|
||||
"file_id": file.get("id", 0),
|
||||
"extract_needed": not already_extracted,
|
||||
"extraction_prompt": f"Extrahiere alle relevanten Informationen aus {file.get('name', '')}",
|
||||
"importance": 3
|
||||
})
|
||||
|
||||
return default_plan
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", exc_info=True)
|
||||
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}",
|
||||
"error"
|
||||
)
|
||||
|
||||
# Leerer Plan bei Fehlern
|
||||
return []
|
||||
|
||||
async def _execute_extractions(
|
||||
extraction_plan: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
messages: List[Dict[str, Any]],
|
||||
lucydom_interface,
|
||||
ai_service,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None,
|
||||
logging_utils = None
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Execute the planned extractions.
|
||||
|
||||
Args:
|
||||
extraction_plan: List of extraction instructions
|
||||
files: List of all available files
|
||||
lucydom_interface: Interface for database access
|
||||
ai_service: Service for AI requests
|
||||
workflow_id: Optional workflow ID for logging
|
||||
add_log_func: Optional function for adding logs
|
||||
logging_utils: Optional logging utility
|
||||
|
||||
Returns:
|
||||
List with extracted data per file
|
||||
"""
|
||||
extracted_data = []
|
||||
|
||||
# Sort by importance
|
||||
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
|
||||
|
||||
for extraction_item in sorted_plan:
|
||||
file_id = extraction_item.get("file_id")
|
||||
extract_needed = extraction_item.get("extract_needed", False)
|
||||
extraction_prompt = extraction_item.get("extraction_prompt", "")
|
||||
|
||||
# Find file metadata
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), None)
|
||||
|
||||
if not file_metadata:
|
||||
logger.warning(f"File with ID {file_id} not found")
|
||||
continue
|
||||
|
||||
file_name = file_metadata.get("name", "")
|
||||
file_type = file_metadata.get("type", "")
|
||||
content_type = file_metadata.get("content_type", "")
|
||||
|
||||
# Add log
|
||||
if logging_utils:
|
||||
logging_utils.info(f"Processing file: {file_name} (Extraction needed: {extract_needed})", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"Processing file: {file_name} (Extraction needed: {extract_needed})",
|
||||
"info"
|
||||
)
|
||||
|
||||
# Only perform extraction if needed
|
||||
if extract_needed:
|
||||
# Get file content via LucyDOM interface
|
||||
if lucydom_interface:
|
||||
try:
|
||||
file_content = await lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if not file_content:
|
||||
if logging_utils:
|
||||
logging_utils.warning(f"File {file_name} not found", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"File {file_name} not found", "warning")
|
||||
continue
|
||||
|
||||
# Perform extraction based on file type
|
||||
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
# Image analysis with AI service
|
||||
if ai_service and hasattr(ai_service, "analyze_image"):
|
||||
try:
|
||||
image_analysis = await ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=extraction_prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": image_analysis,
|
||||
"is_extracted": True,
|
||||
"extraction_method": "image_analysis"
|
||||
})
|
||||
|
||||
if logging_utils:
|
||||
logging_utils.info(f"Image {file_name} successfully analyzed", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Image {file_name} successfully analyzed", "info")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error analyzing image {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
# Fallback if no image analysis available
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"Image: {file_name} (Analysis not available)",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
else:
|
||||
# Text-based extraction for all other file types
|
||||
try:
|
||||
# Import directly here to avoid circular imports
|
||||
from modules.agentservice_utils import extract_text_from_file_content
|
||||
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content,
|
||||
"is_extracted": is_extracted,
|
||||
"extraction_method": "text_extraction"
|
||||
})
|
||||
|
||||
if logging_utils:
|
||||
logging_utils.info(f"File {file_name} extracted (Status: {is_extracted})", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(
|
||||
workflow_id,
|
||||
f"File {file_name} extracted (Status: {is_extracted})",
|
||||
"info"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error extracting text from {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error extracting text from {file_name}: {str(e)}", "error")
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file {file_name}: {str(e)}")
|
||||
if logging_utils:
|
||||
logging_utils.error(f"Error reading file {file_name}: {str(e)}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"Error reading file {file_name}: {str(e)}", "error")
|
||||
else:
|
||||
logger.warning(f"No LucyDOM interface available for file {file_name}")
|
||||
if logging_utils:
|
||||
logging_utils.warning(f"No LucyDOM interface available for file {file_name}", "extraction")
|
||||
elif add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, f"No LucyDOM interface available for file {file_name}", "warning")
|
||||
else:
|
||||
# No extraction needed, use existing content
|
||||
doc_contents = _extract_document_contents_from_messages(file_id, messages)
|
||||
|
||||
if doc_contents:
|
||||
# Use first text content
|
||||
for content in doc_contents:
|
||||
if content.get("type") == "text":
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": content.get("text", ""),
|
||||
"is_extracted": content.get("is_extracted", False),
|
||||
"extraction_method": "existing_content"
|
||||
})
|
||||
break
|
||||
else:
|
||||
# No existing content found
|
||||
extracted_data.append({
|
||||
"file_id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content": f"No content available for {file_name}",
|
||||
"is_extracted": False,
|
||||
"extraction_method": "none"
|
||||
})
|
||||
|
||||
return extracted_data
|
||||
|
||||
def _structure_extracted_data(
|
||||
extracted_data: List[Dict[str, Any]],
|
||||
files: List[Dict[str, Any]],
|
||||
prompt: str
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Structure the extracted data into a formatted result.
|
||||
|
||||
Args:
|
||||
extracted_data: List of extracted data per file
|
||||
files: List of all available files
|
||||
prompt: Original extraction prompt
|
||||
|
||||
Returns:
|
||||
Structured result object
|
||||
"""
|
||||
# Create base structure
|
||||
result = {
|
||||
"prompt": prompt,
|
||||
"files_processed": len(extracted_data),
|
||||
"total_files": len(files),
|
||||
"extraction_timestamp": datetime.now().isoformat(),
|
||||
"status": "success",
|
||||
"extracted_content": []
|
||||
}
|
||||
|
||||
# Add extracted content
|
||||
for data_item in extracted_data:
|
||||
# Enrich with file metadata
|
||||
file_id = data_item.get("file_id", 0)
|
||||
file_metadata = next((f for f in files if f.get("id") == file_id), {})
|
||||
|
||||
content_item = {
|
||||
"file_id": file_id,
|
||||
"name": data_item.get("name", file_metadata.get("name", "")),
|
||||
"type": data_item.get("type", file_metadata.get("type", "")),
|
||||
"content_type": file_metadata.get("content_type", ""),
|
||||
"size": file_metadata.get("size", ""),
|
||||
"is_extracted": data_item.get("is_extracted", False),
|
||||
"extraction_method": data_item.get("extraction_method", ""),
|
||||
"content": data_item.get("content", "")
|
||||
}
|
||||
|
||||
result["extracted_content"].append(content_item)
|
||||
|
||||
return result
|
||||
|
||||
def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract document contents for a specific file from workflow messages.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file
|
||||
messages: List of all messages in the workflow
|
||||
|
||||
Returns:
|
||||
List of document contents for the specified file
|
||||
"""
|
||||
contents = []
|
||||
|
||||
for message in messages:
|
||||
# Search documents in the message
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
|
||||
# Check if file ID matches
|
||||
if source.get("id") == file_id or (source.get("type") == "file" and source.get("id") == file_id):
|
||||
# Add contents of the file
|
||||
doc_contents = document.get("contents", [])
|
||||
|
||||
if doc_contents:
|
||||
contents.extend(doc_contents)
|
||||
|
||||
return contents
|
||||
|
||||
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Helper function for logging with different log functions"""
|
||||
# Log via logger instance
|
||||
if log_type == "error":
|
||||
logger.error(message)
|
||||
elif log_type == "warning":
|
||||
logger.warning(message)
|
||||
else:
|
||||
logger.info(message)
|
||||
|
||||
# Log via provided log function (if available)
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
|
||||
995
gwserver/modules/agentservice_filemanager.py
Normal file
995
gwserver/modules/agentservice_filemanager.py
Normal file
|
|
@ -0,0 +1,995 @@
|
|||
"""
|
||||
Central file management module for the Agentservice.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import base64
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union, BinaryIO
|
||||
from io import BytesIO
|
||||
|
||||
# Import utilities from agentservice_utils
|
||||
from modules.agentservice_utils import extract_text_from_file_content, is_text_extractable
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Helper function for adding logs
|
||||
def _log(add_log_func, workflow_id, message, level="info"):
|
||||
"""Helper function for adding logs with standardized formatting."""
|
||||
if add_log_func and workflow_id:
|
||||
add_log_func(workflow_id, message, level)
|
||||
|
||||
# Also log to standard logger
|
||||
if level == "info":
|
||||
logger.info(message)
|
||||
elif level == "warning":
|
||||
logger.warning(message)
|
||||
elif level == "error":
|
||||
logger.error(message)
|
||||
|
||||
class FileExtractionError(Exception):
|
||||
"""Exception for file extraction errors."""
|
||||
pass
|
||||
|
||||
class FileManager:
|
||||
"""Central file management for the Agentservice."""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Get the singleton instance of FileManager."""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the FileManager."""
|
||||
# Ensure singleton pattern
|
||||
if FileManager._instance is not None:
|
||||
raise RuntimeError("Singleton instance already exists - use get_instance()")
|
||||
|
||||
# Import utilities
|
||||
# Instead of storing file_utils, we'll use the imported functions directly
|
||||
|
||||
async def read_file_contents(self,
|
||||
file_contexts: List[Dict[str, Any]],
|
||||
lucydom_interface,
|
||||
workflow_id: str = None,
|
||||
add_log_func = None,
|
||||
ai_service = None # AI service parameter for image analysis
|
||||
) -> Dict[str, Dict[str, Any]]:
|
||||
"""
|
||||
Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch.
|
||||
Verwendet LucyDOM-Interface statt direkter Dateizugriffe.
|
||||
Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück.
|
||||
|
||||
Args:
|
||||
file_contexts: Liste der Dateikontexte mit Metadaten
|
||||
lucydom_interface: LucyDOM-Interface für Dateizugriffe
|
||||
workflow_id: Optionale ID des Workflows für Logging
|
||||
add_log_func: Optionale Funktion für das Hinzufügen von Logs
|
||||
ai_service: Optionaler AI-Service für die Bildanalyse
|
||||
|
||||
Returns:
|
||||
Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...})
|
||||
"""
|
||||
file_contents = {}
|
||||
|
||||
# Add debug logging
|
||||
logger.info(f"Reading contents of {len(file_contexts)} files for workflow {workflow_id}")
|
||||
|
||||
for file in file_contexts:
|
||||
file_id = file["id"]
|
||||
file_name = file["name"]
|
||||
file_type = file.get("type", "unknown")
|
||||
content_type = file.get("content_type")
|
||||
|
||||
print("DEGUB5:",file_name,file_type)
|
||||
|
||||
try:
|
||||
# Dateiinhalt über LucyDOM-Interface abrufen
|
||||
file_data = await lucydom_interface.read_file_content(file_id)
|
||||
|
||||
if not file_data:
|
||||
_log(add_log_func, workflow_id, f"Datei {file_name} nicht gefunden", "warning")
|
||||
file_contents[file_id] = {
|
||||
"content": f"File content not available (File not found)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
continue
|
||||
|
||||
logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})")
|
||||
|
||||
# Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar
|
||||
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
|
||||
if ai_service and hasattr(ai_service, "analyze_image"):
|
||||
try:
|
||||
image_analysis = await ai_service.analyze_image(
|
||||
image_data=file_data,
|
||||
prompt="Describe this image in detail",
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
logger.debug(f"Image analysis successfully generated for {file_name}")
|
||||
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image Analysis:\n{image_analysis}",
|
||||
"is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
_log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info")
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing image {file_name}: {str(e)}")
|
||||
_log(add_log_func, workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error")
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image file: {file_name} (Analysis failed: {str(e)})",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
else:
|
||||
file_contents[file_id] = {
|
||||
"content": f"Image file: {file_name} (AI analysis not available)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
|
||||
# Dokument- und Textdateien
|
||||
elif (file_type == "document" or not file_type or file_name.lower().endswith(('.csv', '.txt', '.json', '.xml')) or (content_type and content_type.startswith('text/'))):
|
||||
# Verwende die zentrale Textextraktionsfunktion mit Dateiinhalt
|
||||
content, is_extracted = extract_text_from_file_content(
|
||||
file_data, file_name, content_type
|
||||
)
|
||||
file_contents[file_id] = {
|
||||
"content": content,
|
||||
"is_extracted": is_extracted,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
_log(add_log_func, workflow_id,
|
||||
f"File {file_name} read successfully (extracted: {is_extracted})", "info")
|
||||
|
||||
# Andere Dateitypen - nur Metadaten speichern
|
||||
else:
|
||||
file_contents[file_id] = {
|
||||
"content": f"File: {file_name} (Type: {file_type}, content not available)",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
_log(add_log_func, workflow_id, f"Unsupported file type: {file_type} for {file_name}", "warning")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error reading file {file_name}: {str(e)}")
|
||||
_log(add_log_func, workflow_id, f"Error reading file {file_name}: {str(e)}", "error")
|
||||
file_contents[file_id] = {
|
||||
"content": f"File content not available (Error: {str(e)})",
|
||||
"is_extracted": False,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"content_type": content_type
|
||||
}
|
||||
|
||||
return file_contents
|
||||
|
||||
@staticmethod
|
||||
def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde.
|
||||
|
||||
Args:
|
||||
message: Die zu erweiternde Nachricht
|
||||
file_data: Dateimetadaten und Inhalt
|
||||
|
||||
Returns:
|
||||
Die aktualisierte Nachricht mit der Datei
|
||||
"""
|
||||
# Detailliertes Logging für Debugging
|
||||
logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})")
|
||||
|
||||
# Initialize documents array if needed
|
||||
if "documents" not in message:
|
||||
message["documents"] = []
|
||||
logger.debug("Initialized empty documents array in message")
|
||||
|
||||
# Create a unique ID for the document if not provided
|
||||
doc_id = file_data.get("id", f"file_{uuid.uuid4()}")
|
||||
|
||||
# Extract file size if available
|
||||
file_size = file_data.get("size")
|
||||
if isinstance(file_size, str) and file_size.isdigit():
|
||||
file_size = int(file_size)
|
||||
elif file_size is None and file_data.get("content"):
|
||||
# Estimate size from content if not provided
|
||||
file_size = len(file_data.get("content", ""))
|
||||
|
||||
# Bestimmen, ob der Inhalt bereits extrahiert wurde
|
||||
content = file_data.get("content", "No content available")
|
||||
file_name = file_data.get("name", "unnamed_file")
|
||||
content_type = file_data.get("content_type")
|
||||
|
||||
# Prüfen, ob der Inhalt als extrahiert markiert werden sollte
|
||||
is_extracted = file_data.get("is_extracted", False)
|
||||
if not is_extracted and isinstance(content, str) and content.strip() and file_name:
|
||||
# Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp
|
||||
is_extracted = is_text_extractable(file_name, content_type)
|
||||
|
||||
# Create standard document structure that matches the data model
|
||||
document = {
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": file_data.get("id", doc_id),
|
||||
"name": file_name,
|
||||
"content_type": content_type,
|
||||
"size": file_size,
|
||||
"upload_date": file_data.get("upload_date", datetime.now().isoformat())
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Log document structure for debugging
|
||||
logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}")
|
||||
|
||||
# Check if file is already in the message to avoid duplicates
|
||||
file_already_added = any(
|
||||
doc.get("source", {}).get("id") == file_data.get("id")
|
||||
for doc in message.get("documents", [])
|
||||
)
|
||||
|
||||
if not file_already_added:
|
||||
message["documents"].append(document)
|
||||
logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)")
|
||||
else:
|
||||
logger.info(f"File {file_name} already exists in message, skipping")
|
||||
|
||||
return message
|
||||
|
||||
async def analyze_file(self, file_id: int, prompt: str, lucydom_interface, ai_service) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze a file using the appropriate method based on file type.
|
||||
|
||||
Args:
|
||||
file_id: ID of the file to analyze
|
||||
prompt: Analysis prompt
|
||||
lucydom_interface: Interface for database access
|
||||
ai_service: Service for AI requests
|
||||
|
||||
Returns:
|
||||
Analysis result
|
||||
"""
|
||||
if not lucydom_interface:
|
||||
raise ValueError("LucyDOM interface not available")
|
||||
|
||||
if not ai_service:
|
||||
raise ValueError("AI service not available")
|
||||
|
||||
try:
|
||||
# Get file metadata
|
||||
file = lucydom_interface.get_file(file_id)
|
||||
if not file:
|
||||
raise ValueError(f"File with ID {file_id} not found")
|
||||
|
||||
# Get file content
|
||||
file_content = await lucydom_interface.read_file_content(file_id)
|
||||
if not file_content:
|
||||
raise ValueError(f"Content for file {file_id} not found")
|
||||
|
||||
# Extract metadata
|
||||
file_name = file.get("name", "unnamed")
|
||||
content_type = file.get("content_type")
|
||||
file_type = file.get("type")
|
||||
|
||||
# Process based on file type
|
||||
if file_type == "image" or (content_type and content_type.startswith("image/")):
|
||||
# Image analysis
|
||||
if hasattr(ai_service, "analyze_image"):
|
||||
analysis = await ai_service.analyze_image(
|
||||
image_data=file_content,
|
||||
prompt=prompt,
|
||||
mime_type=content_type
|
||||
)
|
||||
|
||||
return {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"analysis_type": "image",
|
||||
"result": analysis
|
||||
}
|
||||
else:
|
||||
raise ValueError("AI service does not support image analysis")
|
||||
|
||||
elif file_name.endswith(".pdf"):
|
||||
# PDF analysis - first extract text, then analyze
|
||||
try:
|
||||
# Extract text
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
if not is_extracted:
|
||||
raise ValueError(f"Failed to extract text from PDF {file_name}")
|
||||
|
||||
# Analyze text with AI
|
||||
pdf_analysis_prompt = f"""
|
||||
Analyze the following PDF content based on this request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
PDF CONTENT:
|
||||
{text_content[:10000]} # Limit to first 10K chars to avoid token limits
|
||||
"""
|
||||
|
||||
analysis = await ai_service.call_api([{"role": "user", "content": pdf_analysis_prompt}])
|
||||
|
||||
# Also check for images in the PDF
|
||||
has_images = False
|
||||
image_analysis = None
|
||||
|
||||
try:
|
||||
# Extract and analyze images
|
||||
image_results = await self.extract_and_analyze_pdf_images(
|
||||
file_content,
|
||||
f"Analyze images with respect to: {prompt}",
|
||||
ai_service
|
||||
)
|
||||
|
||||
if image_results and len(image_results) > 0:
|
||||
has_images = True
|
||||
image_analysis = "\n\nPDF IMAGES ANALYSIS:\n"
|
||||
for img in image_results:
|
||||
image_analysis += f"- Image on page {img.get('page')}: {img.get('response')}\n"
|
||||
except Exception as img_err:
|
||||
logger.warning(f"Could not analyze images in PDF {file_name}: {str(img_err)}")
|
||||
|
||||
# Combine text and image analysis if available
|
||||
if has_images and image_analysis:
|
||||
analysis += image_analysis
|
||||
|
||||
return {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"analysis_type": "pdf",
|
||||
"result": analysis,
|
||||
"has_images": has_images
|
||||
}
|
||||
|
||||
except Exception as pdf_err:
|
||||
logger.error(f"Error analyzing PDF {file_name}: {str(pdf_err)}")
|
||||
raise
|
||||
|
||||
elif file_name.endswith(('.xlsx', '.xls', '.csv')):
|
||||
# Tabular data analysis
|
||||
try:
|
||||
# Extract text content
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
if not is_extracted:
|
||||
raise ValueError(f"Failed to extract data from {file_name}")
|
||||
|
||||
# Analyze with AI
|
||||
data_analysis_prompt = f"""
|
||||
Analyze the following tabular data based on this request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
DATA CONTENT:
|
||||
{text_content[:10000]} # Limit to first 10K chars
|
||||
|
||||
Provide a structured analysis including:
|
||||
1. Data overview
|
||||
2. Key insights
|
||||
3. Patterns and trends
|
||||
4. Answers to the specific request
|
||||
"""
|
||||
|
||||
analysis = await ai_service.call_api([{"role": "user", "content": data_analysis_prompt}])
|
||||
|
||||
return {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"analysis_type": "tabular_data",
|
||||
"result": analysis
|
||||
}
|
||||
|
||||
except Exception as data_err:
|
||||
logger.error(f"Error analyzing tabular data {file_name}: {str(data_err)}")
|
||||
raise
|
||||
|
||||
else:
|
||||
# Default to text analysis for all other file types
|
||||
try:
|
||||
# Extract text content
|
||||
text_content, is_extracted = extract_text_from_file_content(
|
||||
file_content, file_name, content_type
|
||||
)
|
||||
|
||||
if not is_extracted:
|
||||
raise ValueError(f"Failed to extract text from {file_name}")
|
||||
|
||||
# Analyze with AI
|
||||
text_analysis_prompt = f"""
|
||||
Analyze the following document content based on this request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
DOCUMENT CONTENT:
|
||||
{text_content[:10000]} # Limit to first 10K chars
|
||||
"""
|
||||
|
||||
analysis = await ai_service.call_api([{"role": "user", "content": text_analysis_prompt}])
|
||||
|
||||
return {
|
||||
"file_id": file_id,
|
||||
"file_name": file_name,
|
||||
"analysis_type": "text",
|
||||
"result": analysis
|
||||
}
|
||||
|
||||
except Exception as text_err:
|
||||
logger.error(f"Error analyzing text content {file_name}: {str(text_err)}")
|
||||
raise
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing file {file_id}: {str(e)}")
|
||||
raise
|
||||
|
||||
async def extract_and_analyze_pdf_images(self,
|
||||
pdf_content: bytes,
|
||||
prompt: str,
|
||||
ai_service
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extrahiert Bilder aus einer PDF-Datei und analysiert sie.
|
||||
Arbeitet mit Binärdaten statt Dateipfaden.
|
||||
|
||||
Args:
|
||||
pdf_content: Binärdaten der PDF-Datei
|
||||
prompt: Prompt für die Bildanalyse
|
||||
ai_service: AI-Service für die Bildanalyse
|
||||
|
||||
Returns:
|
||||
Liste mit Analyseergebnissen für jedes Bild
|
||||
"""
|
||||
image_responses = []
|
||||
temp_files = [] # Liste der temporären Dateien zur Bereinigung
|
||||
|
||||
try:
|
||||
# PDF mit PyMuPDF öffnen
|
||||
import fitz # PyMuPDF
|
||||
# BytesIO is already imported at the top level
|
||||
import tempfile
|
||||
|
||||
# PDF im Speicher öffnen
|
||||
doc = fitz.open(stream=pdf_content, filetype="pdf")
|
||||
logger.info(f"PDF geöffnet mit {len(doc)} Seiten")
|
||||
|
||||
for page_num, page in enumerate(doc, 1):
|
||||
# Alle Bilder auf der Seite finden
|
||||
image_list = page.get_images(full=True)
|
||||
|
||||
if image_list:
|
||||
logger.info(f"Seite {page_num}: {len(image_list)} Bilder gefunden")
|
||||
|
||||
for img_index, img in enumerate(image_list):
|
||||
try:
|
||||
# Bild-Referenz
|
||||
xref = img[0]
|
||||
|
||||
# Bild und Metadaten extrahieren
|
||||
base_image = doc.extract_image(xref)
|
||||
image_bytes = base_image["image"] # Tatsächliche Bilddaten
|
||||
image_ext = base_image["ext"] # Dateiendung (jpg, png, etc.)
|
||||
|
||||
# Erstelle temporäre Datei
|
||||
fd, temp_img_path = tempfile.mkstemp(suffix=f".{image_ext}")
|
||||
temp_files.append(temp_img_path) # Zur Bereinigungsliste hinzufügen
|
||||
|
||||
with os.fdopen(fd, 'wb') as img_file:
|
||||
img_file.write(image_bytes)
|
||||
|
||||
logger.debug(f"Bild temporär gespeichert: {temp_img_path}")
|
||||
|
||||
# Analysiere mit AI-Service
|
||||
try:
|
||||
analysis_result = await ai_service.analyze_image(
|
||||
image_data=image_bytes, # Direktes Übergeben der Bilddaten
|
||||
prompt=prompt,
|
||||
mime_type=f"image/{image_ext}"
|
||||
)
|
||||
logger.debug(f"Bildanalyse für Bild {img_index} auf Seite {page_num} abgeschlossen")
|
||||
except Exception as analyze_error:
|
||||
logger.error(f"Fehler bei der Bildanalyse: {str(analyze_error)}")
|
||||
analysis_result = f"[Fehler bei der Bildanalyse: {str(analyze_error)}]"
|
||||
|
||||
# Ergebnis speichern
|
||||
try:
|
||||
# Versuche zuerst, die Größe aus base_image zu bekommen
|
||||
if 'width' in base_image and 'height' in base_image:
|
||||
image_size = f"{base_image['width']}x{base_image['height']}"
|
||||
else:
|
||||
# Alternative: Öffne das temporäre Bild, um die Größe zu bestimmen
|
||||
from PIL import Image
|
||||
with Image.open(temp_img_path) as img:
|
||||
width, height = img.size
|
||||
image_size = f"{width}x{height}"
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte Bildgröße nicht ermitteln: {str(e)}")
|
||||
image_size = "unbekannt"
|
||||
|
||||
image_responses.append({
|
||||
"page": page_num,
|
||||
"image_index": img_index,
|
||||
"format": image_ext,
|
||||
"image_size": image_size,
|
||||
"response": analysis_result
|
||||
})
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num}: {str(e)}")
|
||||
continue
|
||||
|
||||
logger.info(f"Extrahiert und analysiert: {len(image_responses)} Bilder aus PDF")
|
||||
|
||||
except ImportError:
|
||||
logger.error("PyMuPDF (fitz) ist nicht installiert. Installiere es mit 'pip install pymupdf'")
|
||||
raise FileExtractionError("PyMuPDF (fitz) ist nicht installiert")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
|
||||
raise FileExtractionError(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
|
||||
finally:
|
||||
# Bereinige alle temporären Dateien
|
||||
for temp_file in temp_files:
|
||||
try:
|
||||
if os.path.exists(temp_file):
|
||||
os.remove(temp_file)
|
||||
except Exception as e:
|
||||
logger.warning(f"Konnte temporäre Datei nicht entfernen: {temp_file} - {str(e)}")
|
||||
|
||||
return image_responses
|
||||
|
||||
async def analyze_multiple_files(
|
||||
self,
|
||||
file_ids: List[int],
|
||||
prompt: str,
|
||||
lucydom_interface,
|
||||
ai_service
|
||||
) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze multiple files and synthesize a combined result.
|
||||
|
||||
Args:
|
||||
file_ids: List of file IDs to analyze
|
||||
prompt: Analysis prompt
|
||||
lucydom_interface: Interface for database access
|
||||
ai_service: Service for AI requests
|
||||
|
||||
Returns:
|
||||
Combined analysis result
|
||||
"""
|
||||
results = []
|
||||
|
||||
# Analyze each file
|
||||
for file_id in file_ids:
|
||||
try:
|
||||
analysis = await self.analyze_file(file_id, prompt, lucydom_interface, ai_service)
|
||||
results.append(analysis)
|
||||
except Exception as e:
|
||||
logger.error(f"Error analyzing file {file_id}: {str(e)}")
|
||||
results.append({
|
||||
"file_id": file_id,
|
||||
"error": str(e),
|
||||
"analysis_type": "error"
|
||||
})
|
||||
|
||||
# Now synthesize a combined analysis
|
||||
if results:
|
||||
try:
|
||||
# Prepare prompt for synthesis
|
||||
synthesis_prompt = f"""
|
||||
Synthesize a combined analysis based on these individual file analyses:
|
||||
|
||||
ORIGINAL REQUEST: {prompt}
|
||||
|
||||
INDIVIDUAL ANALYSES:
|
||||
"""
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
file_name = result.get("file_name", f"File {i}")
|
||||
analysis_type = result.get("analysis_type", "unknown")
|
||||
analysis_result = result.get("result", "No analysis available")
|
||||
|
||||
synthesis_prompt += f"""
|
||||
## {file_name} ({analysis_type})
|
||||
{analysis_result}
|
||||
|
||||
---
|
||||
"""
|
||||
|
||||
synthesis_prompt += """
|
||||
Please provide a comprehensive synthesis that:
|
||||
1. Combines insights from all files
|
||||
2. Addresses the original request
|
||||
3. Highlights connections between different files
|
||||
4. Provides a unified conclusion
|
||||
"""
|
||||
|
||||
# Call AI for synthesis
|
||||
synthesis = await ai_service.call_api([{"role": "user", "content": synthesis_prompt}])
|
||||
|
||||
return {
|
||||
"synthesis": synthesis,
|
||||
"individual_results": results,
|
||||
"files_analyzed": len(results)
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error synthesizing combined analysis: {str(e)}")
|
||||
return {
|
||||
"error": str(e),
|
||||
"individual_results": results,
|
||||
"files_analyzed": len(results)
|
||||
}
|
||||
else:
|
||||
return {
|
||||
"synthesis": "No files were successfully analyzed.",
|
||||
"individual_results": [],
|
||||
"files_analyzed": 0
|
||||
}
|
||||
|
||||
def determine_file_type(self, file_name: str, content_type: str = None) -> str:
|
||||
"""
|
||||
Determine the file type based on name and content type.
|
||||
|
||||
Args:
|
||||
file_name: Name of the file
|
||||
content_type: MIME type (optional)
|
||||
|
||||
Returns:
|
||||
File type string ('document', 'image', etc.)
|
||||
"""
|
||||
# Check content type first
|
||||
if content_type:
|
||||
if content_type.startswith('image/'):
|
||||
return "image"
|
||||
elif content_type in ['application/pdf']:
|
||||
return "document"
|
||||
elif content_type in ['application/vnd.ms-excel',
|
||||
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'text/csv']:
|
||||
return "spreadsheet"
|
||||
|
||||
# Check file extension
|
||||
lower_name = file_name.lower()
|
||||
|
||||
# Images
|
||||
if lower_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg')):
|
||||
return "image"
|
||||
|
||||
# Documents
|
||||
if lower_name.endswith(('.pdf', '.doc', '.docx', '.txt', '.md', '.rtf')):
|
||||
return "document"
|
||||
|
||||
# Spreadsheets
|
||||
if lower_name.endswith(('.xlsx', '.xls', '.csv')):
|
||||
return "spreadsheet"
|
||||
|
||||
# Presentations
|
||||
if lower_name.endswith(('.pptx', '.ppt')):
|
||||
return "presentation"
|
||||
|
||||
# Data files
|
||||
if lower_name.endswith(('.json', '.xml', '.yaml', '.yml')):
|
||||
return "data"
|
||||
|
||||
# Default to document
|
||||
return "document"
|
||||
|
||||
def get_mime_type(self, file_name: str) -> str:
|
||||
"""Get MIME type based on file name."""
|
||||
# Import from lucydom_interface
|
||||
from lucydom_interface import LucyDOMInterface
|
||||
temp_interface = LucyDOMInterface(0, 0) # Default values
|
||||
return temp_interface.get_mime_type(file_name)
|
||||
|
||||
def prepare_file_contexts(self, files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Bereitet die Dateikontexte basierend auf Metadaten vor.
|
||||
Akzeptiert keine Pfade mehr, sondern nur Metadaten aus der Datenbank.
|
||||
|
||||
Args:
|
||||
files: Liste von Dateien mit Metadaten (Dict mit id, name, type, content_type)
|
||||
|
||||
Returns:
|
||||
Liste von Dateikontexten für die Verarbeitung
|
||||
"""
|
||||
file_contexts = []
|
||||
|
||||
logger.info(f"Preparing file contexts for {len(files)} files")
|
||||
|
||||
for file in files:
|
||||
file_id = file.get("id")
|
||||
file_name = file.get("name")
|
||||
file_type = file.get("type")
|
||||
|
||||
# Create a comprehensive context with all available metadata
|
||||
context = {
|
||||
"id": file_id,
|
||||
"name": file_name,
|
||||
"type": file_type,
|
||||
"size": file.get("size", "Unbekannt"),
|
||||
"content_type": file.get("content_type"),
|
||||
"path": file.get("path"),
|
||||
"upload_date": file.get("upload_date"),
|
||||
"hash": file.get("hash"),
|
||||
"mandate_id": file.get("mandate_id"),
|
||||
"user_id": file.get("user_id")
|
||||
}
|
||||
|
||||
# Log for debugging
|
||||
logger.info(f"Created file context: {file_name} (ID: {file_id}, Type: {file_type})")
|
||||
|
||||
file_contexts.append(context)
|
||||
|
||||
return file_contexts
|
||||
|
||||
# Factory method
|
||||
@staticmethod
|
||||
def get_instance():
|
||||
"""Get the singleton instance of FileManager."""
|
||||
if FileManager._instance is None:
|
||||
FileManager._instance = FileManager()
|
||||
return FileManager._instance
|
||||
|
||||
|
||||
# Create a singleton instance for module-level access
|
||||
file_manager = FileManager.get_instance()
|
||||
|
||||
def get_file_manager():
|
||||
"""Get the singleton instance of FileManager."""
|
||||
return file_manager
|
||||
|
||||
|
||||
class WorkflowFileManager:
|
||||
"""
|
||||
Specialized file manager for workflow operations.
|
||||
Handles workflow-specific file operations and document management.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None, lucydom_interface = None):
|
||||
"""
|
||||
Initialize the workflow file manager.
|
||||
|
||||
Args:
|
||||
workflow_id: Optional workflow ID for context
|
||||
lucydom_interface: LucyDOM interface for database operations
|
||||
"""
|
||||
self.workflow_id = workflow_id
|
||||
self.lucydom_interface = lucydom_interface
|
||||
self.file_manager = get_file_manager()
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Set or update the workflow ID."""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_lucydom_interface(self, lucydom_interface):
|
||||
"""Set or update the LucyDOM interface."""
|
||||
self.lucydom_interface = lucydom_interface
|
||||
|
||||
async def add_files_to_message(self,
|
||||
message: Dict[str, Any],
|
||||
file_ids: List[int],
|
||||
add_log_func = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Add multiple files to a message.
|
||||
|
||||
Args:
|
||||
message: The message to add files to
|
||||
file_ids: List of file IDs to add
|
||||
add_log_func: Optional logging function
|
||||
|
||||
Returns:
|
||||
Updated message
|
||||
"""
|
||||
if not self.lucydom_interface:
|
||||
_log(add_log_func, self.workflow_id, "LucyDOM interface not available", "error")
|
||||
return message
|
||||
|
||||
updated_message = message.copy()
|
||||
|
||||
# Get file metadata
|
||||
files = []
|
||||
for file_id in file_ids:
|
||||
file = self.lucydom_interface.get_file(file_id)
|
||||
if file:
|
||||
files.append(file)
|
||||
else:
|
||||
_log(add_log_func, self.workflow_id, f"File not found: {file_id}", "warning")
|
||||
|
||||
# Prepare file contexts
|
||||
file_contexts = self.file_manager.prepare_file_contexts(files)
|
||||
|
||||
# Read file contents
|
||||
file_contents = await self.file_manager.read_file_contents(
|
||||
file_contexts,
|
||||
self.lucydom_interface,
|
||||
self.workflow_id,
|
||||
add_log_func
|
||||
)
|
||||
|
||||
# Add files to message
|
||||
for file_id, content_data in file_contents.items():
|
||||
# Add file to message
|
||||
updated_message = FileManager.add_file_to_message(updated_message, content_data)
|
||||
|
||||
return updated_message
|
||||
|
||||
def get_files_from_message(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Extract file references from a message.
|
||||
|
||||
Args:
|
||||
message: The message to extract files from
|
||||
|
||||
Returns:
|
||||
List of file metadata
|
||||
"""
|
||||
files = []
|
||||
|
||||
# Process documents
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def get_document_text_content(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Extract text content from all documents in a message.
|
||||
|
||||
Args:
|
||||
message: The message to extract content from
|
||||
|
||||
Returns:
|
||||
Combined text content
|
||||
"""
|
||||
content = ""
|
||||
|
||||
# Process all documents
|
||||
for doc in message.get("documents", []):
|
||||
for doc_content in doc.get("contents", []):
|
||||
if doc_content.get("type") == "text":
|
||||
content += "\n\n" + doc_content.get("text", "")
|
||||
|
||||
return content
|
||||
|
||||
async def extract_document_info(self,
|
||||
workflow: Dict[str, Any],
|
||||
message_id: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract document information from a workflow or specific message.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
message_id: Optional message ID to focus on a specific message
|
||||
|
||||
Returns:
|
||||
Document information
|
||||
"""
|
||||
result = {
|
||||
"documents": [],
|
||||
"file_count": 0,
|
||||
"extracted_text": ""
|
||||
}
|
||||
|
||||
if message_id:
|
||||
# Process only the specified message
|
||||
for message in workflow.get("messages", []):
|
||||
if message.get("id") == message_id:
|
||||
files = self.get_files_from_message(message)
|
||||
result["documents"].extend(files)
|
||||
result["file_count"] = len(files)
|
||||
result["extracted_text"] = self.get_document_text_content(message)
|
||||
break
|
||||
else:
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
files = self.get_files_from_message(message)
|
||||
result["documents"].extend(files)
|
||||
result["extracted_text"] += self.get_document_text_content(message)
|
||||
|
||||
# De-duplicate files
|
||||
unique_files = {}
|
||||
for file in result["documents"]:
|
||||
file_id = file.get("id")
|
||||
if file_id and file_id not in unique_files:
|
||||
unique_files[file_id] = file
|
||||
|
||||
result["documents"] = list(unique_files.values())
|
||||
result["file_count"] = len(result["documents"])
|
||||
|
||||
return result
|
||||
|
||||
async def analyze_workflow_documents(self,
|
||||
workflow: Dict[str, Any],
|
||||
prompt: str,
|
||||
ai_service,
|
||||
message_id: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
Analyze documents in a workflow.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
prompt: Analysis prompt
|
||||
ai_service: Service for AI analysis
|
||||
message_id: Optional message ID to focus on specific message
|
||||
|
||||
Returns:
|
||||
Analysis result
|
||||
"""
|
||||
if not self.lucydom_interface:
|
||||
raise ValueError("LucyDOM interface not available")
|
||||
|
||||
if not ai_service:
|
||||
raise ValueError("AI service not available")
|
||||
|
||||
# Extract document info
|
||||
doc_info = await self.extract_document_info(workflow, message_id)
|
||||
|
||||
if doc_info["file_count"] == 0:
|
||||
return {
|
||||
"result": "No documents found for analysis",
|
||||
"files_analyzed": 0
|
||||
}
|
||||
|
||||
# Get file IDs
|
||||
file_ids = [doc.get("id") for doc in doc_info["documents"] if doc.get("id")]
|
||||
|
||||
# Analyze files
|
||||
analysis = await self.file_manager.analyze_multiple_files(
|
||||
file_ids,
|
||||
prompt,
|
||||
self.lucydom_interface,
|
||||
ai_service
|
||||
)
|
||||
|
||||
return analysis
|
||||
|
||||
|
||||
# Export the workflow file manager factory function
|
||||
def get_workflow_file_manager(workflow_id: str = None, lucydom_interface = None):
|
||||
"""Get a workflow file manager instance."""
|
||||
return WorkflowFileManager(workflow_id, lucydom_interface)
|
||||
|
|
@ -1,100 +1,116 @@
|
|||
"""
|
||||
Registry für alle verfügbaren Agenten im System.
|
||||
Verwaltet die Agenten-Instanzen und stellt sie für den Workflow zur Verfügung.
|
||||
Updated registry for all available agents in the system.
|
||||
Provides centralized agent registration and access with improved error handling.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import importlib
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
# Import direkt bekannter Agent-Module
|
||||
# Andere Module werden dynamisch importiert
|
||||
# Import direct base agent module
|
||||
from modules.agentservice_base import BaseAgent
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentRegistry:
|
||||
"""Registry für alle verfügbaren Agenten im System"""
|
||||
"""Registry for all available agents in the system"""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz der Agent-Registry zurück"""
|
||||
"""Get a singleton instance of the Agent Registry"""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert die Agent-Registry"""
|
||||
"""Initialize the Agent Registry"""
|
||||
if AgentRegistry._instance is not None:
|
||||
raise RuntimeError("Singleton-Instanz existiert bereits - nutze get_instance()")
|
||||
raise RuntimeError("Singleton instance already exists - use get_instance()")
|
||||
self.agents = {}
|
||||
self._load_agents()
|
||||
|
||||
def _load_agents(self):
|
||||
"""Lädt alle verfügbaren Agenten"""
|
||||
# Liste aller zu ladenden Agenten-Module
|
||||
agent_modules = [
|
||||
"agentservice_agent_user",
|
||||
"agentservice_agent_coder",
|
||||
"agentservice_agent_analyst",
|
||||
"agentservice_agent_webcrawler",
|
||||
"agentservice_agent_sharepoint",
|
||||
"agentservice_agent_documentation"
|
||||
]
|
||||
|
||||
"""Load all available agents"""
|
||||
# List of all agent modules to load
|
||||
logger.info("Automatically loading agent modules...")
|
||||
agent_modules = []
|
||||
for filename in os.listdir(os.path.dirname(__file__)):
|
||||
if filename.startswith("agentservice_agent_") and filename.endswith(".py"):
|
||||
agent_modules.append(filename[:-3]) # Remove .py extension
|
||||
if not agent_modules:
|
||||
logger.warning("No agent modules found")
|
||||
return
|
||||
logger.info(f"Found {len(agent_modules)} agent modules")
|
||||
|
||||
for module_name in agent_modules:
|
||||
try:
|
||||
# Importiere das Modul
|
||||
# Import the module
|
||||
try:
|
||||
module = importlib.import_module(f"modules.{module_name}")
|
||||
except ImportError:
|
||||
module = importlib.import_module(module_name)
|
||||
|
||||
# Suche nach der Agent-Klasse statt nach getter-Funktion
|
||||
# Look for the agent class or a get_*_agent function
|
||||
agent_type = module_name.split('_')[-1]
|
||||
class_name = f"{agent_type.capitalize()}Agent"
|
||||
getter_name = f"get_{agent_type}_agent"
|
||||
|
||||
if hasattr(module, class_name):
|
||||
# Instanziiere den Agenten direkt
|
||||
agent = None
|
||||
|
||||
# Try to get the agent via the get_*_agent function
|
||||
if hasattr(module, getter_name):
|
||||
getter_func = getattr(module, getter_name)
|
||||
agent = getter_func()
|
||||
logger.info(f"Agent '{agent.name}' (Type: {agent.type}) loaded via {getter_name}()")
|
||||
|
||||
# Alternatively, try to instantiate the agent directly
|
||||
elif hasattr(module, class_name):
|
||||
agent_class = getattr(module, class_name)
|
||||
agent = agent_class()
|
||||
|
||||
# Registriere den Agenten
|
||||
logger.info(f"Agent '{agent.name}' (Type: {agent.type}) directly instantiated")
|
||||
|
||||
if agent:
|
||||
# Register the agent
|
||||
self.register_agent(agent)
|
||||
logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) wurde geladen")
|
||||
else:
|
||||
logger.warning(f"Keine {class_name} Klasse in Modul {module_name} gefunden")
|
||||
logger.warning(f"No agent class or getter function found in module {module_name}")
|
||||
|
||||
except ImportError as e:
|
||||
logger.warning(f"Modul {module_name} konnte nicht importiert werden: {e}")
|
||||
logger.warning(f"Module {module_name} could not be imported: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
|
||||
logger.error(f"Error loading agent from module {module_name}: {e}")
|
||||
|
||||
def register_agent(self, agent: BaseAgent):
|
||||
"""Registriert einen Agenten in der Registry."""
|
||||
"""
|
||||
Register an agent in the registry.
|
||||
|
||||
Args:
|
||||
agent: The agent to register
|
||||
"""
|
||||
agent_type = agent.type
|
||||
self.agents[agent_type] = agent
|
||||
# Zusätzlich nach ID registrieren
|
||||
# Also register by ID
|
||||
self.agents[agent.id] = agent
|
||||
logger.debug(f"Agent '{agent.name}' (Typ: {agent_type}) wurde registriert")
|
||||
logger.debug(f"Agent '{agent.name}' (Type: {agent_type}) registered")
|
||||
|
||||
def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:
|
||||
"""
|
||||
Gibt eine Instanz eines Agenten nach ID oder Typ zurück.
|
||||
Get an agent instance by ID or type.
|
||||
|
||||
Args:
|
||||
agent_identifier: ID oder Typ des gewünschten Agenten
|
||||
agent_identifier: ID or type of the desired agent
|
||||
|
||||
Returns:
|
||||
Agent-Instanz oder None, wenn nicht gefunden
|
||||
Agent instance or None if not found
|
||||
"""
|
||||
# Versuche, direkt nach Typ zu finden
|
||||
# Try to find directly by type
|
||||
if agent_identifier in self.agents:
|
||||
return self.agents[agent_identifier]
|
||||
|
||||
# Wenn nicht gefunden, versuche verschiedene Varianten des Namens
|
||||
# If not found, try different name variants
|
||||
variants = [
|
||||
agent_identifier,
|
||||
agent_identifier.replace('_agent', ''),
|
||||
|
|
@ -105,17 +121,17 @@ class AgentRegistry:
|
|||
if variant in self.agents:
|
||||
return self.agents[variant]
|
||||
|
||||
logger.warning(f"Agent mit Identifier '{agent_identifier}' nicht gefunden")
|
||||
logger.warning(f"Agent with identifier '{agent_identifier}' not found")
|
||||
return None
|
||||
|
||||
def get_all_agents(self) -> Dict[str, BaseAgent]:
|
||||
"""Gibt alle registrierten Agenten zurück."""
|
||||
"""Get all registered agents."""
|
||||
return self.agents
|
||||
|
||||
def get_agent_infos(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt Informationen zu allen registrierten Agenten zurück."""
|
||||
"""Get information about all registered agents."""
|
||||
agent_infos = []
|
||||
# Nur einmal pro Agent-Instanz (da wir sowohl nach Typ als auch nach ID registrieren)
|
||||
# Only once per agent instance (since we register both by type and ID)
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
|
|
@ -123,8 +139,42 @@ class AgentRegistry:
|
|||
seen_agents.add(agent)
|
||||
return agent_infos
|
||||
|
||||
def get_agent_by_format(self, required_format: str) -> Optional[BaseAgent]:
|
||||
"""
|
||||
Find an agent that can produce the required output format.
|
||||
|
||||
Args:
|
||||
required_format: The required output format
|
||||
|
||||
Returns:
|
||||
Agent that can produce the required format, or None if not found
|
||||
"""
|
||||
# Create mapping of result format -> agent for faster lookup
|
||||
format_to_agent = {}
|
||||
seen_agents = set()
|
||||
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
# Get the agent's result format
|
||||
agent_format = getattr(agent, 'result_format', None)
|
||||
if agent_format:
|
||||
format_to_agent[agent_format.lower()] = agent
|
||||
seen_agents.add(agent)
|
||||
|
||||
# Try to find an exact match
|
||||
if required_format.lower() in format_to_agent:
|
||||
return format_to_agent[required_format.lower()]
|
||||
|
||||
# If no exact match, try to find a partial match
|
||||
for fmt, agent in format_to_agent.items():
|
||||
if required_format.lower() in fmt or fmt in required_format.lower():
|
||||
return agent
|
||||
|
||||
# No match found
|
||||
return None
|
||||
|
||||
def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]:
|
||||
"""Initialisiert Agenten für einen Workflow."""
|
||||
"""Initialize agents for a workflow."""
|
||||
initialized_agents = {}
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
|
|
@ -133,4 +183,38 @@ class AgentRegistry:
|
|||
agent_id = agent_info["id"]
|
||||
initialized_agents[agent_id] = agent_info
|
||||
seen_agents.add(agent)
|
||||
return initialized_agents
|
||||
return initialized_agents
|
||||
|
||||
def get_agent_capabilities(self) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Get a mapping of capabilities to agents.
|
||||
Useful for finding the right agent for a specific task.
|
||||
|
||||
Returns:
|
||||
Dict mapping capability keywords to agent IDs
|
||||
"""
|
||||
capabilities_map = {}
|
||||
seen_agents = set()
|
||||
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
# Get agent info
|
||||
agent_info = agent.get_agent_info()
|
||||
agent_id = agent_info["id"]
|
||||
|
||||
# Extract capabilities
|
||||
capabilities = agent_info.get("capabilities", "")
|
||||
|
||||
# Split capabilities into keywords
|
||||
if capabilities:
|
||||
keywords = [kw.strip().lower() for kw in capabilities.split(',')]
|
||||
|
||||
# Add each keyword to the mapping
|
||||
for keyword in keywords:
|
||||
if keyword not in capabilities_map:
|
||||
capabilities_map[keyword] = []
|
||||
capabilities_map[keyword].append(agent_id)
|
||||
|
||||
seen_agents.add(agent)
|
||||
|
||||
return capabilities_map
|
||||
758
gwserver/modules/agentservice_utils.py
Normal file
758
gwserver/modules/agentservice_utils.py
Normal file
|
|
@ -0,0 +1,758 @@
|
|||
"""
|
||||
Centralized utility functions for the Agentservice (continued).
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union, Callable
|
||||
from io import BytesIO
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowUtils:
|
||||
"""
|
||||
Utility class for workflow operations.
|
||||
Centralizes common workflow-related functions.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None):
|
||||
"""Initialize with optional workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Set or update the workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def get_documents(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all documents from a workflow across all messages.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
List of document objects
|
||||
"""
|
||||
documents = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
# Add to list if not already present
|
||||
if not any(d.get("id") == doc.get("id") for d in documents):
|
||||
documents.append(doc)
|
||||
|
||||
return documents
|
||||
|
||||
def get_files(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all file references from a workflow.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
List of file metadata objects
|
||||
"""
|
||||
files = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
# Check if file is already in the list
|
||||
if not any(f.get("id") == file_info["id"] for f in files):
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def extract_by_prompt(self, workflow: Dict[str, Any], prompt: str, ai_service) -> Dict[str, Any]:
|
||||
"""
|
||||
Extract data from workflow documents based on an AI prompt.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
prompt: The extraction prompt
|
||||
ai_service: The AI service to use for extraction
|
||||
|
||||
Returns:
|
||||
Extracted data
|
||||
"""
|
||||
# This is an async method but we're exposing it as a regular method
|
||||
# The caller should use it with asyncio.run() or await
|
||||
async def _extract():
|
||||
# Create extraction prompt
|
||||
files = self.get_files(workflow)
|
||||
file_descriptions = "\n".join([f"- {f.get('name', 'unnamed')} ({f.get('type', 'unknown')})" for f in files])
|
||||
|
||||
extraction_prompt = f"""
|
||||
Extract relevant information from the following files based on this request:
|
||||
|
||||
REQUEST: {prompt}
|
||||
|
||||
FILES:
|
||||
{file_descriptions}
|
||||
|
||||
Focus on the most relevant content and provide a structured output.
|
||||
"""
|
||||
|
||||
# Call AI
|
||||
response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
|
||||
|
||||
return {
|
||||
"prompt": prompt,
|
||||
"extracted_content": response,
|
||||
"files_processed": len(files)
|
||||
}
|
||||
|
||||
# Return the coroutine
|
||||
return _extract()
|
||||
|
||||
def merge_workflows(self, workflows: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Merge multiple workflows into a single unified workflow.
|
||||
Useful for workflow templates or combining partial workflows.
|
||||
|
||||
Args:
|
||||
workflows: List of workflow objects to merge
|
||||
|
||||
Returns:
|
||||
Merged workflow
|
||||
"""
|
||||
if not workflows:
|
||||
return {}
|
||||
|
||||
# Start with the first workflow
|
||||
result = workflows[0].copy()
|
||||
|
||||
# Initialize lists if not present
|
||||
if "messages" not in result:
|
||||
result["messages"] = []
|
||||
if "logs" not in result:
|
||||
result["logs"] = []
|
||||
|
||||
# Merge additional workflows
|
||||
for workflow in workflows[1:]:
|
||||
# Append messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Check for duplicates
|
||||
if not any(m.get("id") == message.get("id") for m in result["messages"]):
|
||||
result["messages"].append(message)
|
||||
|
||||
# Append logs
|
||||
for log in workflow.get("logs", []):
|
||||
# Check for duplicates
|
||||
if not any(l.get("id") == log.get("id") for l in result["logs"]):
|
||||
result["logs"].append(log)
|
||||
|
||||
# Update status if needed
|
||||
if workflow.get("status") == "failed":
|
||||
result["status"] = "failed"
|
||||
|
||||
# Update last_activity if newer
|
||||
if (workflow.get("last_activity") and
|
||||
(not result.get("last_activity") or
|
||||
workflow["last_activity"] > result["last_activity"])):
|
||||
result["last_activity"] = workflow["last_activity"]
|
||||
|
||||
return result
|
||||
|
||||
def get_message(self, workflow: Dict[str, Any], message_id: str) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Find a message by ID in the workflow.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
message_id: The message ID to find
|
||||
|
||||
Returns:
|
||||
Message object or None if not found
|
||||
"""
|
||||
for message in workflow.get("messages", []):
|
||||
if message.get("id") == message_id:
|
||||
return message
|
||||
return None
|
||||
|
||||
def to_str(self, workflow: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert workflow to a formatted string representation.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
|
||||
Returns:
|
||||
String representation of the workflow
|
||||
"""
|
||||
# Create a summary string
|
||||
result = f"Workflow: {workflow.get('id')}\n"
|
||||
result += f"Status: {workflow.get('status', 'unknown')}\n"
|
||||
result += f"Started: {workflow.get('started_at', 'unknown')}\n"
|
||||
result += f"Last Activity: {workflow.get('last_activity', 'unknown')}\n"
|
||||
|
||||
# Add message count
|
||||
message_count = len(workflow.get("messages", []))
|
||||
result += f"Messages: {message_count}\n"
|
||||
|
||||
# Add log count
|
||||
log_count = len(workflow.get("logs", []))
|
||||
result += f"Logs: {log_count}\n"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class MessageUtils:
|
||||
"""
|
||||
Utility class for message operations.
|
||||
Centralizes common message-related functions.
|
||||
"""
|
||||
|
||||
def create_message(self, workflow_id: str, role: str = "system") -> Dict[str, Any]:
|
||||
"""
|
||||
Create a new message object.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow
|
||||
role: Role of the message ('system', 'user', 'assistant')
|
||||
|
||||
Returns:
|
||||
New message object
|
||||
"""
|
||||
message_id = f"msg_{uuid.uuid4()}"
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
# Create message object
|
||||
message = {
|
||||
"id": message_id,
|
||||
"workflow_id": workflow_id,
|
||||
"parent_message_id": None,
|
||||
"started_at": current_time,
|
||||
"finished_at": None,
|
||||
"sequence_no": 0,
|
||||
|
||||
"status": "pending",
|
||||
"role": role,
|
||||
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
|
||||
"documents": [],
|
||||
"content": None,
|
||||
"agent_type": None
|
||||
}
|
||||
|
||||
return message
|
||||
|
||||
def finalize_message(self, message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Finalize a message by setting completion timestamp.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
Updated message object
|
||||
"""
|
||||
message["finished_at"] = datetime.now().isoformat()
|
||||
message["status"] = "completed"
|
||||
return message
|
||||
|
||||
def get_documents(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all documents from a message.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
List of document objects
|
||||
"""
|
||||
return message.get("documents", [])
|
||||
|
||||
def get_files(self, message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Get all file references from a message.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
List of file metadata objects
|
||||
"""
|
||||
files = []
|
||||
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", ""),
|
||||
"size": source.get("size", 0)
|
||||
}
|
||||
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def extract_text_content(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Extract text content from a message including document content.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
String with all text content from the message
|
||||
"""
|
||||
content = message.get("content", "")
|
||||
|
||||
# Add document content
|
||||
for doc in message.get("documents", []):
|
||||
# Check for document contents
|
||||
for doc_content in doc.get("contents", []):
|
||||
if doc_content.get("type") == "text":
|
||||
content += "\n\n" + doc_content.get("text", "")
|
||||
|
||||
return content
|
||||
|
||||
def to_str(self, message: Dict[str, Any]) -> str:
|
||||
"""
|
||||
Convert message to a formatted string representation.
|
||||
|
||||
Args:
|
||||
message: The message object
|
||||
|
||||
Returns:
|
||||
String representation of the message
|
||||
"""
|
||||
# Create a summary string
|
||||
result = f"Message: {message.get('id')}\n"
|
||||
result += f"Role: {message.get('role', 'unknown')}\n"
|
||||
|
||||
# Add agent info if available
|
||||
if message.get("agent_type"):
|
||||
result += f"Agent: {message.get('agent_name', message.get('agent_type', 'unknown'))}\n"
|
||||
|
||||
# Add content summary
|
||||
content = message.get("content", "")
|
||||
if content:
|
||||
content_preview = content[:100] + "..." if len(content) > 100 else content
|
||||
result += f"Content: {content_preview}\n"
|
||||
|
||||
# Add document count
|
||||
doc_count = len(message.get("documents", []))
|
||||
result += f"Documents: {doc_count}\n"
|
||||
|
||||
return result
|
||||
|
||||
|
||||
class FileUtils:
|
||||
"""
|
||||
Utility class for file operations.
|
||||
Centralizes common file-related functions.
|
||||
"""
|
||||
|
||||
def is_text_extractable(self, file_name: str, content_type: str = None) -> bool:
|
||||
"""
|
||||
Check if text can be extracted from a file.
|
||||
|
||||
Args:
|
||||
file_name: Name of the file
|
||||
content_type: MIME type (optional)
|
||||
|
||||
Returns:
|
||||
True if text can be extracted, False otherwise
|
||||
"""
|
||||
# Text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
||||
return True
|
||||
|
||||
# Excel files
|
||||
if file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
# PDF files
|
||||
if file_name.endswith('.pdf'):
|
||||
try:
|
||||
# Check if PyPDF2 or PyMuPDF is available
|
||||
try:
|
||||
import PyPDF2
|
||||
return True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
# Images and other non-text files
|
||||
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
||||
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
||||
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
||||
return False
|
||||
|
||||
# Check content type if file extension doesn't give a clear answer
|
||||
if content_type:
|
||||
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
||||
return True
|
||||
elif content_type == 'application/pdf':
|
||||
return True
|
||||
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return False
|
||||
|
||||
# Default to allowing extraction attempt
|
||||
return True
|
||||
|
||||
def get_mime_type(self, file_name: str) -> str:
|
||||
"""
|
||||
Get MIME type based on file name.
|
||||
|
||||
Args:
|
||||
file_name: Name of the file
|
||||
|
||||
Returns:
|
||||
MIME type string
|
||||
"""
|
||||
import mimetypes
|
||||
|
||||
# Initialize mimetypes
|
||||
mimetypes.init()
|
||||
|
||||
# Get MIME type
|
||||
mime_type, _ = mimetypes.guess_type(file_name)
|
||||
|
||||
if not mime_type:
|
||||
# Default mappings for common extensions
|
||||
extension_map = {
|
||||
'txt': 'text/plain',
|
||||
'md': 'text/markdown',
|
||||
'json': 'application/json',
|
||||
'csv': 'text/csv',
|
||||
'html': 'text/html',
|
||||
'htm': 'text/html',
|
||||
'pdf': 'application/pdf',
|
||||
'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
||||
'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
||||
'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
|
||||
'jpg': 'image/jpeg',
|
||||
'jpeg': 'image/jpeg',
|
||||
'png': 'image/png',
|
||||
'gif': 'image/gif',
|
||||
'svg': 'image/svg+xml',
|
||||
'webp': 'image/webp',
|
||||
'mp4': 'video/mp4',
|
||||
'mp3': 'audio/mpeg'
|
||||
}
|
||||
|
||||
# Get extension
|
||||
ext = os.path.splitext(file_name)[1].lower().lstrip('.')
|
||||
|
||||
# Return mapped MIME type or default
|
||||
mime_type = extension_map.get(ext, 'application/octet-stream')
|
||||
|
||||
return mime_type
|
||||
|
||||
|
||||
class LoggingUtils:
|
||||
"""
|
||||
Enhanced logging utilities for better workflow tracking.
|
||||
Provides structured and categorized logging for workflows.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_id: str = None, log_func: Callable = None):
|
||||
"""
|
||||
Initialize logging utilities.
|
||||
|
||||
Args:
|
||||
workflow_id: ID of the workflow for context
|
||||
log_func: Function to call for adding workflow logs
|
||||
"""
|
||||
self.workflow_id = workflow_id
|
||||
self.log_func = log_func
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Define log categories
|
||||
self.categories = {
|
||||
"workflow": "Workflow Management",
|
||||
"planning": "Activity Planning",
|
||||
"execution": "Activity Execution",
|
||||
"agents": "Agent Selection & Execution",
|
||||
"files": "File Processing",
|
||||
"summary": "Results Summary",
|
||||
"error": "Error Handling"
|
||||
}
|
||||
|
||||
def set_workflow_id(self, workflow_id: str):
|
||||
"""Update the workflow ID"""
|
||||
self.workflow_id = workflow_id
|
||||
|
||||
def set_log_func(self, log_func: Callable):
|
||||
"""Update the log function"""
|
||||
self.log_func = log_func
|
||||
|
||||
def info(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log an informational message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.info(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "info", category, category_name)
|
||||
|
||||
def warning(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log a warning message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.warning(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "warning", category, category_name)
|
||||
|
||||
def error(self, message: str, category: str = "error", details: str = None):
|
||||
"""
|
||||
Log an error message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.error(log_message)
|
||||
|
||||
# Log to workflow if function available
|
||||
if self.log_func and self.workflow_id:
|
||||
self.log_func(self.workflow_id, message, "error", category, category_name)
|
||||
|
||||
def debug(self, message: str, category: str = "workflow", details: str = None):
|
||||
"""
|
||||
Log a debug message.
|
||||
|
||||
Args:
|
||||
message: The log message
|
||||
category: Log category
|
||||
details: Optional detailed information
|
||||
"""
|
||||
category_name = self.categories.get(category, category)
|
||||
log_message = f"[{category_name}] {message}"
|
||||
|
||||
# Log to standard logger
|
||||
self.logger.debug(log_message)
|
||||
|
||||
def get_category_name(self, category: str) -> str:
|
||||
"""
|
||||
Get human-readable category name.
|
||||
|
||||
Args:
|
||||
category: Category code
|
||||
|
||||
Returns:
|
||||
Human-readable category name
|
||||
"""
|
||||
return self.categories.get(category, category)
|
||||
|
||||
|
||||
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:
|
||||
"""
|
||||
Extract text from various file formats based on binary content.
|
||||
|
||||
Args:
|
||||
file_content: Binary content of the file
|
||||
file_name: Name of the file for format detection
|
||||
content_type: Optional MIME type of the file
|
||||
|
||||
Returns:
|
||||
Tuple with (extracted text, is_extracted flag)
|
||||
"""
|
||||
# Check if file is likely text-extractable
|
||||
if not is_text_extractable(file_name, content_type):
|
||||
return f"[File: {file_name} - Text extraction not supported]", False
|
||||
|
||||
try:
|
||||
# Simple text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv', '.log', '.ini', '.cfg', '.conf')) or (content_type and (content_type.startswith('text/') or content_type in ['application/json', 'application/xml', 'text/csv'])):
|
||||
try:
|
||||
return file_content.decode('utf-8'), True
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
return file_content.decode('latin1'), True
|
||||
except:
|
||||
return file_content.decode('cp1252', errors='replace'), True
|
||||
|
||||
# Excel files
|
||||
elif file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas as pd
|
||||
# Create temporary in-memory file
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_excel(file_obj)
|
||||
result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result, True
|
||||
except ImportError:
|
||||
return f"[Excel file: {file_name} - pandas not installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error extracting Excel content: {str(e)}]", False
|
||||
|
||||
# CSV files
|
||||
elif file_name.endswith('.csv'):
|
||||
try:
|
||||
import pandas as pd
|
||||
try:
|
||||
# Create temporary in-memory file
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_csv(file_obj, encoding='utf-8')
|
||||
except UnicodeDecodeError:
|
||||
file_obj = BytesIO(file_content)
|
||||
try:
|
||||
df = pd.read_csv(file_obj, encoding='latin1')
|
||||
except:
|
||||
file_obj = BytesIO(file_content)
|
||||
df = pd.read_csv(file_obj, encoding='cp1252')
|
||||
|
||||
result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
|
||||
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
|
||||
result += df.to_string(index=False)
|
||||
return result, True
|
||||
except ImportError:
|
||||
return f"[CSV file: {file_name} - pandas not installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error extracting CSV content: {str(e)}]", False
|
||||
|
||||
# PDF files
|
||||
elif file_name.endswith('.pdf'):
|
||||
try:
|
||||
try:
|
||||
from PyPDF2 import PdfReader
|
||||
reader = PdfReader(BytesIO(file_content))
|
||||
text = ""
|
||||
for page in reader.pages:
|
||||
text += page.extract_text() + "\n\n"
|
||||
return text, True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
doc = fitz.open(stream=file_content, filetype="pdf")
|
||||
text = ""
|
||||
for page in doc:
|
||||
text += page.get_text() + "\n\n"
|
||||
return text, True
|
||||
except ImportError:
|
||||
return f"[PDF: {file_name} - No PDF library installed]", False
|
||||
except Exception as e:
|
||||
return f"[Error reading PDF file {file_name}: {str(e)}]", False
|
||||
|
||||
# Default case - try basic text extraction
|
||||
else:
|
||||
try:
|
||||
return file_content.decode('utf-8', errors='replace'), True
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
return f"[Text extraction error: {str(e)}]", False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error extracting text from {file_name}: {str(e)}")
|
||||
return f"[Text extraction error: {str(e)}]", False
|
||||
|
||||
|
||||
def is_text_extractable(file_name: str, content_type: str = None) -> bool:
|
||||
"""Check if text can be extracted from a file."""
|
||||
# Text files
|
||||
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
|
||||
return True
|
||||
|
||||
# Excel files
|
||||
if file_name.endswith(('.xlsx', '.xls')):
|
||||
try:
|
||||
import pandas
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
# PDF files
|
||||
if file_name.endswith('.pdf'):
|
||||
try:
|
||||
# Check if PyPDF2 or PyMuPDF is available
|
||||
try:
|
||||
import PyPDF2
|
||||
return True
|
||||
except ImportError:
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
return True
|
||||
except ImportError:
|
||||
return False
|
||||
except:
|
||||
return False
|
||||
|
||||
# Images and other non-text files
|
||||
if file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg',
|
||||
'.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv',
|
||||
'.mp3', '.wav', '.ogg', '.flac', '.aac')):
|
||||
return False
|
||||
|
||||
# Check content type if file extension doesn't give a clear answer
|
||||
if content_type:
|
||||
if content_type.startswith(('text/', 'application/json', 'application/xml')):
|
||||
return True
|
||||
elif content_type == 'application/pdf':
|
||||
return True
|
||||
elif content_type.startswith(('image/', 'video/', 'audio/')):
|
||||
return False
|
||||
|
||||
# Default to allowing extraction attempt
|
||||
return True
|
||||
641
gwserver/modules/agentservice_workflow_execution.py
Normal file
641
gwserver/modules/agentservice_workflow_execution.py
Normal file
|
|
@ -0,0 +1,641 @@
|
|||
"""
|
||||
Refactored workflow execution for the Agentservice.
|
||||
Implements a structured workflow with clear separation of planning and execution phases.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import asyncio
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Any, Optional, Tuple, Union
|
||||
|
||||
# Import utility module (will be created)
|
||||
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
|
||||
|
||||
# Import for data extraction
|
||||
from modules.agentservice_dataextraction import data_extraction
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class WorkflowExecution:
|
||||
"""
|
||||
Handles the execution of workflows in a structured, multi-phase approach.
|
||||
Separates planning from execution and provides better logging.
|
||||
"""
|
||||
|
||||
def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):
|
||||
"""Initialize the workflow execution"""
|
||||
self.workflow_manager = workflow_manager
|
||||
self.workflow_id = workflow_id
|
||||
self.mandate_id = mandate_id
|
||||
self.user_id = user_id
|
||||
self.ai_service = ai_service
|
||||
self.lucydom_interface = lucydom_interface
|
||||
|
||||
# Initialize utilities
|
||||
self.workflow_utils = WorkflowUtils(workflow_id)
|
||||
self.message_utils = MessageUtils()
|
||||
self.logging_utils = LoggingUtils(workflow_id, self._add_log)
|
||||
|
||||
async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
|
||||
"""
|
||||
Execute the workflow following the new structured approach.
|
||||
|
||||
Args:
|
||||
message: The initiating message (prompt or user input)
|
||||
workflow: The workflow object
|
||||
files: Optional list of file metadata
|
||||
is_user_input: Flag indicating if this is user input
|
||||
|
||||
Returns:
|
||||
Dict with workflow status and result
|
||||
"""
|
||||
try:
|
||||
# 1. Initialize the workflow (already done by the caller)
|
||||
self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized")
|
||||
|
||||
# 2. Create a message with user input
|
||||
user_message = self._create_message(workflow, message.get("role", "user"))
|
||||
user_message["content"] = message.get("content", "")
|
||||
|
||||
# Process files if provided
|
||||
if files and len(files) > 0:
|
||||
self.logging_utils.info(f"Processing {len(files)} files", "files", f"Processing files: {[f.get('name', 'unknown') for f in files]}")
|
||||
await self._process_files(workflow, user_message, files)
|
||||
|
||||
# Add the message to the workflow
|
||||
if "messages" not in workflow:
|
||||
workflow["messages"] = []
|
||||
workflow["messages"].append(user_message)
|
||||
|
||||
# Save workflow state
|
||||
self.workflow_manager._save_workflow(workflow)
|
||||
self.logging_utils.info("User message processed", "workflow", "User input added to workflow")
|
||||
|
||||
# 3. Create work plan using AI
|
||||
work_plan = await self._create_work_plan(workflow, user_message)
|
||||
self.logging_utils.info(f"Created work plan with {len(work_plan)} activities", "planning", "Work plan created")
|
||||
|
||||
# 4. Execute each activity in the work plan
|
||||
results = []
|
||||
for i, activity in enumerate(work_plan, 1):
|
||||
self.logging_utils.info(f"Starting activity {i}/{len(work_plan)}: {activity.get('title', 'Unnamed')}",
|
||||
"execution", f"Activity: {activity.get('title', 'Unnamed')}")
|
||||
|
||||
# Execute the activity
|
||||
activity_result = await self._execute_activity(workflow, activity)
|
||||
results.append(activity_result)
|
||||
|
||||
# Save intermediate state
|
||||
self.workflow_manager._save_workflow(workflow)
|
||||
|
||||
# 5. Create summary for the user
|
||||
summary = await self._create_summary(workflow, results)
|
||||
self.logging_utils.info("Created workflow summary", "summary", "Workflow summary created")
|
||||
|
||||
# Set workflow status to completed
|
||||
workflow["status"] = "completed"
|
||||
workflow["last_activity"] = datetime.now().isoformat()
|
||||
|
||||
# Final save
|
||||
self.workflow_manager._save_workflow(workflow)
|
||||
|
||||
return {
|
||||
"workflow_id": self.workflow_id,
|
||||
"status": "completed",
|
||||
"messages": workflow.get("messages", [])
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error", f"Error: {str(e)}")
|
||||
workflow["status"] = "failed"
|
||||
self.workflow_manager._save_workflow(workflow)
|
||||
|
||||
return {
|
||||
"workflow_id": self.workflow_id,
|
||||
"status": "failed",
|
||||
"error": str(e)
|
||||
}
|
||||
|
||||
async def _process_files(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]]):
|
||||
"""
|
||||
Process files and add them to the message.
|
||||
Extracts text content where possible.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
message: The message to add files to
|
||||
files: List of file metadata
|
||||
"""
|
||||
# Import necessary modules
|
||||
from modules.agentservice_filemanager import get_file_manager
|
||||
# Get the file manager instance
|
||||
file_manager = get_file_manager()
|
||||
|
||||
# Prepare file contexts
|
||||
file_contexts = file_manager.prepare_file_contexts(files)
|
||||
self.logging_utils.info(f"Prepared contexts for {len(file_contexts)} files", "files", "File contexts prepared")
|
||||
|
||||
# Read file contents
|
||||
file_contents = await file_manager.read_file_contents(
|
||||
file_contexts,
|
||||
self.lucydom_interface,
|
||||
self.workflow_id,
|
||||
self._add_log,
|
||||
self.ai_service
|
||||
)
|
||||
|
||||
# Add files to message
|
||||
for file_id, content in file_contents.items():
|
||||
file_metadata = next((f for f in files if f.get('id') == file_id), {})
|
||||
|
||||
file_data = {
|
||||
"id": file_id,
|
||||
"name": file_metadata.get('name', 'unnamed_file'),
|
||||
"content_type": file_metadata.get('content_type'),
|
||||
"type": file_metadata.get('type', "unknown"),
|
||||
"content": content.get("content", "") if isinstance(content, dict) else content,
|
||||
"size": file_metadata.get('size'),
|
||||
"is_extracted": content.get("is_extracted", False) if isinstance(content, dict) else False
|
||||
}
|
||||
|
||||
self.logging_utils.info(f"Adding file {file_data['name']} to message", "files", f"Adding file: {file_data['name']}")
|
||||
file_manager.add_file_to_message(message, file_data)
|
||||
|
||||
async def _create_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Create a structured work plan based on the user's request.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
message: The initiating message
|
||||
|
||||
Returns:
|
||||
List of structured activities to execute
|
||||
"""
|
||||
# Extract context information
|
||||
task = message.get("content", "")
|
||||
documents = message.get("documents", [])
|
||||
|
||||
# Create the planning prompt
|
||||
plan_prompt = f"""
|
||||
As an AI workflow manager, create a detailed work plan for the following task:
|
||||
|
||||
TASK: {task}
|
||||
|
||||
The work plan should include a structured list of activities. Each activity should have:
|
||||
1. title - A short descriptive title for the activity
|
||||
2. description - What needs to be done in this activity
|
||||
3. agent_prompt - A complete prompt to give to the AI agent(s) for this activity
|
||||
4. data_prompt - A prompt describing what data will be needed for this activity
|
||||
5. expected_format - The expected output format (e.g., "Text", "JSON", "Table", "FileList")
|
||||
6. dependencies - List of previous activities this depends on (by index)
|
||||
|
||||
Return the work plan as a JSON array of activity objects, each with the above properties.
|
||||
The work plan should be logical, efficient, and comprehensively address the task.
|
||||
"""
|
||||
|
||||
# Add information about available documents if present
|
||||
if documents:
|
||||
doc_info = []
|
||||
for doc in documents:
|
||||
source = doc.get("source", {})
|
||||
doc_info.append({
|
||||
"name": source.get("name", "unnamed"),
|
||||
"type": source.get("type", "unknown"),
|
||||
"content_type": source.get("content_type", "unknown")
|
||||
})
|
||||
|
||||
plan_prompt += f"\n\nAvailable documents: {doc_info}"
|
||||
|
||||
self.logging_utils.info("Requesting AI work plan", "planning", "Generating work plan")
|
||||
|
||||
# Call AI to generate work plan
|
||||
try:
|
||||
plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])
|
||||
|
||||
print("DEBUG prompt=",plan_prompt," Response=",plan_response)
|
||||
|
||||
# Extract JSON plan (using a helper utility)
|
||||
import json
|
||||
import re
|
||||
|
||||
# Look for JSON array in the response
|
||||
json_pattern = r'\[\s*\{.*\}\s*\]'
|
||||
json_match = re.search(json_pattern, plan_response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
json_str = json_match.group(0)
|
||||
work_plan = json.loads(json_str)
|
||||
self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning",
|
||||
f"Work plan activities: {[activity.get('title', 'Unnamed') for activity in work_plan]}")
|
||||
return work_plan
|
||||
else:
|
||||
self.logging_utils.warning("Could not extract JSON from AI response", "planning",
|
||||
"Fallback to default work plan")
|
||||
|
||||
# Fallback: Create a simple default work plan
|
||||
return [{
|
||||
"title": "Process Task",
|
||||
"description": "Process the user's request directly",
|
||||
"agent_prompt": task,
|
||||
"data_prompt": "All available data is needed for this task",
|
||||
"expected_format": "Text",
|
||||
"dependencies": []
|
||||
}]
|
||||
|
||||
except Exception as e:
|
||||
self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning", f"Work plan error: {str(e)}")
|
||||
# Return a minimal fallback plan
|
||||
return [{
|
||||
"title": "Process Task (Error Recovery)",
|
||||
"description": "Process the user's request after planning error",
|
||||
"agent_prompt": task,
|
||||
"data_prompt": "All available data is needed for this task",
|
||||
"expected_format": "Text",
|
||||
"dependencies": []
|
||||
}]
|
||||
|
||||
async def _execute_activity(self, workflow: Dict[str, Any], activity: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Execute a single activity from the work plan.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
activity: The activity definition from the work plan
|
||||
|
||||
Returns:
|
||||
Result of the activity execution
|
||||
"""
|
||||
# Extract activity information
|
||||
title = activity.get("title", "Unnamed Activity")
|
||||
agent_prompt = activity.get("agent_prompt", "")
|
||||
data_prompt = activity.get("data_prompt", "")
|
||||
expected_format = activity.get("expected_format", "Text")
|
||||
|
||||
self.logging_utils.info(f"Executing activity: {title}", "execution", f"Activity: {title}, Format: {expected_format}")
|
||||
|
||||
# 1. Determine which agents to use
|
||||
agents_config = await self._select_agents(workflow, agent_prompt, expected_format)
|
||||
self.logging_utils.info(f"Selected {len(agents_config)} agents for execution", "agents",
|
||||
f"Agents: {[agent.get('agent_id', 'unknown') for agent in agents_config]}")
|
||||
|
||||
# 2. Extract the necessary data
|
||||
from modules.agentservice_registry import AgentRegistry
|
||||
registry = AgentRegistry.get_instance()
|
||||
|
||||
# If no agents were selected, use the moderator directly
|
||||
if not agents_config:
|
||||
self.logging_utils.info("No specific agents selected, using moderator", "agents", "Using moderator")
|
||||
# Create a message with the moderator's response
|
||||
moderator_message = self._create_message(workflow, "assistant")
|
||||
moderator_message["content"] = f"No specialized agents needed for this task. Processing directly: {agent_prompt}"
|
||||
moderator_message["agent_type"] = "moderator"
|
||||
moderator_message["agent_id"] = "moderator"
|
||||
moderator_message["agent_name"] = "Moderator"
|
||||
|
||||
# Add message to workflow
|
||||
workflow["messages"].append(moderator_message)
|
||||
|
||||
# Direct AI call for simple result
|
||||
result_content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You are a helpful assistant processing the user's request."},
|
||||
{"role": "user", "content": agent_prompt}
|
||||
])
|
||||
|
||||
# Create result message
|
||||
result_message = self._create_message(workflow, "assistant")
|
||||
result_message["content"] = result_content
|
||||
result_message["agent_type"] = "assistant"
|
||||
result_message["agent_id"] = "assistant"
|
||||
result_message["agent_name"] = "AI Assistant"
|
||||
result_message["result_format"] = "Text"
|
||||
|
||||
# Add message to workflow
|
||||
workflow["messages"].append(result_message)
|
||||
|
||||
return {
|
||||
"title": title,
|
||||
"content": result_content,
|
||||
"agent": "assistant",
|
||||
"format": "Text"
|
||||
}
|
||||
|
||||
# 3. Execute the agents in sequence
|
||||
last_result = None
|
||||
for agent_config in agents_config:
|
||||
agent_id = agent_config.get("agent_id")
|
||||
agent_prompt = agent_config.get("prompt")
|
||||
expected_format = agent_config.get("expected_format", "Text")
|
||||
|
||||
# Get the agent from registry
|
||||
agent = registry.get_agent(agent_id)
|
||||
if not agent:
|
||||
self.logging_utils.warning(f"Agent '{agent_id}' not found, skipping", "agents", f"Agent not found: {agent_id}")
|
||||
continue
|
||||
|
||||
# Incorporate previous result if available
|
||||
if last_result:
|
||||
agent_prompt = f"{agent_prompt}\n\nPrevious result: {last_result}"
|
||||
|
||||
self.logging_utils.info(f"Executing agent: {agent_id}", "agents", f"Agent: {agent_id}, Format: {expected_format}")
|
||||
|
||||
# Extract any needed data
|
||||
if data_prompt:
|
||||
# Get all messages from the workflow
|
||||
workflow_messages = workflow.get("messages", [])
|
||||
|
||||
# Extract data using the dataextraction module
|
||||
extracted_data = await data_extraction(
|
||||
prompt=data_prompt,
|
||||
files=self._extract_files_from_workflow(workflow),
|
||||
messages=workflow_messages,
|
||||
ai_service=self.ai_service,
|
||||
lucydom_interface=self.lucydom_interface,
|
||||
workflow_id=self.workflow_id,
|
||||
add_log_func=self._add_log
|
||||
)
|
||||
|
||||
# Add the data context to the prompt
|
||||
if extracted_data and "extracted_content" in extracted_data:
|
||||
data_summary = "\n\nExtracted data summary:\n"
|
||||
for item in extracted_data.get("extracted_content", []):
|
||||
data_summary += f"- {item.get('name', 'unnamed')}: {item.get('content', '')[:100]}...\n"
|
||||
|
||||
agent_prompt += data_summary
|
||||
|
||||
# Create the agent message
|
||||
agent_message = self._create_message(workflow, "user")
|
||||
agent_message["content"] = agent_prompt
|
||||
agent_message["workflow_id"] = self.workflow_id
|
||||
|
||||
# Execute the agent
|
||||
agent_response = await agent.process_message(agent_message, {"expected_format": expected_format})
|
||||
|
||||
# Process agent response
|
||||
if agent_response:
|
||||
# Create response message
|
||||
response_message = self._create_message(workflow, "assistant")
|
||||
response_message["content"] = agent_response.get("content", "")
|
||||
response_message["agent_type"] = agent_id
|
||||
response_message["agent_id"] = agent_id
|
||||
response_message["agent_name"] = agent.name
|
||||
response_message["result_format"] = agent_response.get("result_format", expected_format)
|
||||
|
||||
# Add to workflow
|
||||
workflow["messages"].append(response_message)
|
||||
|
||||
# Update last result
|
||||
last_result = agent_response.get("content", "")
|
||||
|
||||
# Return the final result
|
||||
return {
|
||||
"title": title,
|
||||
"content": last_result or "",
|
||||
"agent": agent_config.get("agent_id", "unknown") if agents_config else "none",
|
||||
"format": expected_format
|
||||
}
|
||||
|
||||
async def _select_agents(self, workflow: Dict[str, Any], prompt: str, expected_format: str) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Select appropriate agents for a given prompt and expected format.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
prompt: The prompt to process
|
||||
expected_format: The expected output format
|
||||
|
||||
Returns:
|
||||
List of agent configurations (agent_id, prompt, expected_format)
|
||||
"""
|
||||
# Get available agents
|
||||
from modules.agentservice_registry import AgentRegistry
|
||||
registry = AgentRegistry.get_instance()
|
||||
|
||||
# Get all agents except user_agent
|
||||
system_agents = {}
|
||||
for agent_id, agent in registry.get_all_agents().items():
|
||||
if agent.type != "user" and agent_id not in system_agents:
|
||||
system_agents[agent_id] = agent.get_agent_info()
|
||||
|
||||
# Create agent selection prompt
|
||||
selection_prompt = f"""
|
||||
You are a workflow coordinator responsible for selecting appropriate agents for a task.
|
||||
|
||||
TASK PROMPT: {prompt}
|
||||
|
||||
EXPECTED FORMAT: {expected_format}
|
||||
|
||||
AVAILABLE AGENTS:
|
||||
"""
|
||||
|
||||
# Add agent descriptions
|
||||
for agent_id, agent_info in system_agents.items():
|
||||
selection_prompt += f"""
|
||||
- ID: {agent_id}
|
||||
Name: {agent_info.get('name', '')}
|
||||
Type: {agent_info.get('type', '')}
|
||||
Description: {agent_info.get('description', '')}
|
||||
Capabilities: {agent_info.get('capabilities', '')}
|
||||
Result Format: {agent_info.get('result_format', 'Text')}
|
||||
"""
|
||||
|
||||
selection_prompt += """
|
||||
Based on the task and expected format, select the appropriate agent(s) to use.
|
||||
|
||||
Return your selection as a JSON array with objects containing:
|
||||
1. agent_id: The ID of the selected agent
|
||||
2. prompt: A specific prompt tailored for this agent
|
||||
3. expected_format: The expected output format
|
||||
|
||||
You can select multiple agents if needed, in which case they will be executed in sequence.
|
||||
If no specialized agent is needed, return an empty array.
|
||||
"""
|
||||
|
||||
# Call AI to select agents
|
||||
try:
|
||||
selection_response = await self.ai_service.call_api([{"role": "user", "content": selection_prompt}])
|
||||
|
||||
# Extract JSON from response
|
||||
import json
|
||||
import re
|
||||
|
||||
# Look for JSON array
|
||||
json_pattern = r'\[\s*\{.*\}\s*\]'
|
||||
json_match = re.search(json_pattern, selection_response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
json_str = json_match.group(0)
|
||||
selected_agents = json.loads(json_str)
|
||||
|
||||
# Validate selections
|
||||
valid_agents = []
|
||||
for agent_config in selected_agents:
|
||||
if "agent_id" in agent_config and agent_config["agent_id"] in system_agents:
|
||||
valid_agents.append(agent_config)
|
||||
|
||||
return valid_agents
|
||||
elif "[]" in selection_response:
|
||||
# Empty array - no agents needed
|
||||
return []
|
||||
else:
|
||||
# Could not parse response, use default strategy
|
||||
self.logging_utils.warning("Could not parse agent selection response", "agents",
|
||||
"Falling back to default agent selection")
|
||||
|
||||
# Simple heuristic for default agent selection based on expected format
|
||||
if expected_format.lower() in ["file", "filelist", "document"]:
|
||||
return [{
|
||||
"agent_id": "filecreator_agent",
|
||||
"prompt": prompt,
|
||||
"expected_format": expected_format
|
||||
}]
|
||||
elif expected_format.lower() in ["report", "analysis", "document"]:
|
||||
return [{
|
||||
"agent_id": "documentation_agent",
|
||||
"prompt": prompt,
|
||||
"expected_format": expected_format
|
||||
}]
|
||||
elif "web" in prompt.lower() or "search" in prompt.lower():
|
||||
return [{
|
||||
"agent_id": "webcrawler_agent",
|
||||
"prompt": prompt,
|
||||
"expected_format": expected_format
|
||||
}]
|
||||
elif "analyze" in prompt.lower() or "data" in prompt.lower():
|
||||
return [{
|
||||
"agent_id": "analyst_agent",
|
||||
"prompt": prompt,
|
||||
"expected_format": expected_format
|
||||
}]
|
||||
else:
|
||||
# No specific agent needed
|
||||
return []
|
||||
|
||||
except Exception as e:
|
||||
self.logging_utils.error(f"Error selecting agents: {str(e)}", "agents", f"Agent selection error: {str(e)}")
|
||||
return [] # Empty array - use default processing
|
||||
|
||||
async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Create a summary of the workflow results for the user.
|
||||
|
||||
Args:
|
||||
workflow: The workflow object
|
||||
results: Results from activity executions
|
||||
|
||||
Returns:
|
||||
Summary message
|
||||
"""
|
||||
# Create a summary prompt
|
||||
summary_prompt = "Create a clear, concise summary of the following workflow results:\n\n"
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
title = result.get("title", f"Activity {i}")
|
||||
content = result.get("content", "")
|
||||
agent = result.get("agent", "unknown")
|
||||
|
||||
# Limit content length for the summary prompt
|
||||
content_preview = content[:500] + "..." if len(content) > 500 else content
|
||||
|
||||
summary_prompt += f"""
|
||||
ACTIVITY {i}: {title}
|
||||
Executed by: {agent}
|
||||
|
||||
{content_preview}
|
||||
|
||||
---
|
||||
"""
|
||||
|
||||
summary_prompt += """
|
||||
Provide a well-structured summary that:
|
||||
1. Highlights the key findings and results
|
||||
2. Connects the results to the original task
|
||||
3. Presents any conclusions or recommendations
|
||||
|
||||
Make sure the summary is clear, concise, and useful to the user.
|
||||
"""
|
||||
|
||||
# Call AI to generate summary
|
||||
summary_content = await self.ai_service.call_api([{"role": "user", "content": summary_prompt}])
|
||||
|
||||
# Create summary message
|
||||
summary_message = self._create_message(workflow, "assistant")
|
||||
summary_message["content"] = summary_content
|
||||
summary_message["agent_type"] = "summary"
|
||||
summary_message["agent_id"] = "workflow_summary"
|
||||
summary_message["agent_name"] = "Workflow Summary"
|
||||
summary_message["result_format"] = "Text"
|
||||
summary_message["workflow_complete"] = True
|
||||
|
||||
# Add to workflow
|
||||
workflow["messages"].append(summary_message)
|
||||
|
||||
return summary_message
|
||||
|
||||
def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:
|
||||
"""Create a new message object for the workflow"""
|
||||
# This is a utility function that should be moved to the utility module
|
||||
message_id = f"msg_{uuid.uuid4()}"
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
# Determine sequence number
|
||||
sequence_no = 1
|
||||
if "messages" in workflow and workflow["messages"]:
|
||||
sequence_no = len(workflow["messages"]) + 1
|
||||
|
||||
# Create message object
|
||||
message = {
|
||||
"id": message_id,
|
||||
"workflow_id": self.workflow_id,
|
||||
"started_at": current_time,
|
||||
"finished_at": None,
|
||||
"sequence_no": sequence_no,
|
||||
|
||||
"status": "pending",
|
||||
"role": role,
|
||||
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
|
||||
"documents": [],
|
||||
"content": None,
|
||||
"agent_type": None
|
||||
}
|
||||
|
||||
return message
|
||||
|
||||
def _extract_files_from_workflow(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""Extract file information from all messages in the workflow"""
|
||||
files = []
|
||||
|
||||
# Process all messages
|
||||
for message in workflow.get("messages", []):
|
||||
# Extract documents from the message
|
||||
for doc in message.get("documents", []):
|
||||
source = doc.get("source", {})
|
||||
|
||||
# Only include file documents
|
||||
if source.get("type") == "file":
|
||||
file_info = {
|
||||
"id": source.get("id", ""),
|
||||
"name": source.get("name", ""),
|
||||
"type": source.get("content_type", ""),
|
||||
"content_type": source.get("content_type", "")
|
||||
}
|
||||
|
||||
# Check if file is already in the list (avoid duplicates)
|
||||
if not any(f.get("id") == file_info["id"] for f in files):
|
||||
files.append(file_info)
|
||||
|
||||
return files
|
||||
|
||||
def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):
|
||||
"""Add a log entry to the workflow"""
|
||||
# This actually calls back to the workflow manager's log function
|
||||
self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)
|
||||
File diff suppressed because it is too large
Load diff
|
|
@ -537,7 +537,7 @@ class LucyDOMInterface:
|
|||
|
||||
|
||||
|
||||
async def read_file_content(self, file_id: str) -> Optional[bytes]:
|
||||
async def read_file_content(self, file_id: int) -> Optional[bytes]:
|
||||
"""
|
||||
Reads the content of a file by ID
|
||||
|
||||
|
|
@ -572,7 +572,7 @@ class LucyDOMInterface:
|
|||
logger.error(f"Fehler beim Lesen der Datei {file_id}: {str(e)}")
|
||||
raise FileError(f"Fehler beim Lesen der Datei: {str(e)}")
|
||||
|
||||
def download_file(self, file_id: str) -> Optional[Dict[str, Any]]:
|
||||
def download_file(self, file_id: int) -> Optional[Dict[str, Any]]:
|
||||
"""
|
||||
Gibt eine Datei zum Download zurück.
|
||||
|
||||
|
|
@ -614,7 +614,7 @@ class LucyDOMInterface:
|
|||
logger.error(f"Fehler beim Herunterladen der Datei {file_id}: {str(e)}")
|
||||
raise FileError(f"Fehler beim Herunterladen der Datei: {str(e)}")
|
||||
|
||||
def delete_file(self, file_id: str) -> bool:
|
||||
def delete_file(self, file_id: int) -> bool:
|
||||
"""
|
||||
Löscht eine Datei aus der Datenbank und dem Dateisystem.
|
||||
|
||||
|
|
@ -1142,7 +1142,7 @@ class LucyDOMInterface:
|
|||
logger.error(f"Fehler beim Löschen der Nachricht {message_id}: {str(e)}")
|
||||
return False
|
||||
|
||||
def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: str) -> bool:
|
||||
def delete_file_from_message(self, workflow_id: str, message_id: str, file_id: int) -> bool:
|
||||
"""
|
||||
Entfernt eine Dateireferenz aus einer Nachricht.
|
||||
Die Datei selbst wird nicht gelöscht, nur die Referenz in der Nachricht.
|
||||
|
|
|
|||
|
|
@ -68,6 +68,7 @@ async def list_workflows(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored list_workflows method that takes mandate_id and user_id
|
||||
workflows = await workflow_manager.list_workflows(mandate_id, user_id)
|
||||
return workflows
|
||||
|
||||
|
|
@ -82,8 +83,8 @@ async def create_workflow(
|
|||
mandate_id, user_id = await get_user_context(current_user)
|
||||
|
||||
# Add debug logging for the workflow request
|
||||
print(f"DEBUG - Creating workflow with request: {workflow_request}")
|
||||
print(f"DEBUG - Files in request: {workflow_request.files}")
|
||||
logger.debug(f"Creating workflow with request: {workflow_request}")
|
||||
logger.debug(f"Files in request: {workflow_request.files}")
|
||||
|
||||
# LucyDOM-Interface mit Benutzerkontext initialisieren
|
||||
lucy_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
|
@ -92,18 +93,18 @@ async def create_workflow(
|
|||
files = []
|
||||
for file_id in workflow_request.files:
|
||||
# Add logging before file lookup
|
||||
print(f"DEBUG - Looking up file with ID: {file_id}")
|
||||
logger.debug(f"Looking up file with ID: {file_id}")
|
||||
|
||||
file = lucy_interface.get_file(file_id)
|
||||
if not file:
|
||||
print(f"DEBUG - File with ID {file_id} not found in database")
|
||||
logger.warning(f"File with ID {file_id} not found in database")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Datei mit ID {file_id} nicht gefunden"
|
||||
)
|
||||
|
||||
# Add logging on successful file lookup
|
||||
print(f"DEBUG - Found file: {file.get('name', 'unknown')} (ID: {file_id})")
|
||||
logger.debug(f"Found file: {file.get('name', 'unknown')} (ID: {file_id})")
|
||||
files.append(file)
|
||||
|
||||
# Workflow ID generieren
|
||||
|
|
@ -132,7 +133,7 @@ async def create_workflow(
|
|||
if hasattr(workflow_request, attr) and getattr(workflow_request, attr) is not None:
|
||||
workflow_data[attr] = getattr(workflow_request, attr)
|
||||
|
||||
# Workflow in Datenbank speichern
|
||||
# Workflow in Datenbank speichern - this should now be handled by initialize_workflow in the manager
|
||||
if lucy_interface:
|
||||
try:
|
||||
lucy_interface.create_workflow(workflow_data)
|
||||
|
|
@ -143,9 +144,9 @@ async def create_workflow(
|
|||
)
|
||||
|
||||
# Log files before executing workflow
|
||||
print(f"DEBUG - Executing workflow with {len(files)} files:")
|
||||
logger.info(f"Executing workflow with {len(files)} files:")
|
||||
for file in files:
|
||||
print(f"DEBUG - File: {file.get('name', 'unknown')} (ID: {file.get('id', 'unknown')})")
|
||||
logger.debug(f"File: {file.get('name', 'unknown')} (ID: {file.get('id', 'unknown')})")
|
||||
|
||||
# Workflow starten (asynchron)
|
||||
workflow_task = asyncio.create_task(
|
||||
|
|
@ -254,6 +255,7 @@ async def delete_workflow(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Using the refactored delete_workflow method
|
||||
success = await workflow_manager.delete_workflow(workflow_id)
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
|
|
@ -277,6 +279,7 @@ async def get_workflow_status(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored get_workflow_status method
|
||||
status = workflow_manager.get_workflow_status(workflow_id)
|
||||
if not status:
|
||||
# Versuche, den Workflow zu laden
|
||||
|
|
@ -303,6 +306,7 @@ async def get_workflow_logs(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored get_workflow_logs method
|
||||
logs = workflow_manager.get_workflow_logs(workflow_id)
|
||||
if logs is None:
|
||||
# Versuche, den Workflow zu laden
|
||||
|
|
@ -329,6 +333,7 @@ async def get_workflow_messages(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored get_workflow_messages method
|
||||
messages = workflow_manager.get_workflow_messages(workflow_id)
|
||||
if messages is None:
|
||||
# Versuche, den Workflow zu laden
|
||||
|
|
@ -355,6 +360,7 @@ async def stop_workflow(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored stop_workflow method
|
||||
result = await workflow_manager.stop_workflow(workflow_id)
|
||||
if not result:
|
||||
raise HTTPException(
|
||||
|
|
@ -488,6 +494,7 @@ async def get_workflow_data_statistics(
|
|||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
||||
# Use the refactored get_workflow_status method to get data stats
|
||||
status = workflow_manager.get_workflow_status(workflow_id)
|
||||
if not status:
|
||||
raise HTTPException(
|
||||
|
|
@ -744,7 +751,7 @@ async def delete_workflow_message(
|
|||
# Nachricht aus dem Workflow entfernen
|
||||
deleted_message = workflow["messages"].pop(message_index)
|
||||
|
||||
# Log über Löschung hinzufügen
|
||||
# Log über Löschung hinzufügen - using the refactored _add_log method
|
||||
workflow_manager._add_log(
|
||||
workflow,
|
||||
f"Nachricht gelöscht: {deleted_message.get('role', 'unknown')} - {message_id[:8]}...",
|
||||
|
|
@ -761,7 +768,7 @@ async def delete_workflow_message(
|
|||
workflow_manager.lucydom_interface.delete_workflow_message(workflow_id, message_id)
|
||||
except Exception as e:
|
||||
# Fehler beim Löschen in der Datenbank loggen, aber nicht scheitern lassen
|
||||
print(f"Warnung: Nachricht aus Workflow entfernt, aber Fehler beim Löschen aus der Datenbank: {str(e)}")
|
||||
logger.warning(f"Nachricht aus Workflow entfernt, aber Fehler beim Löschen aus der Datenbank: {str(e)}")
|
||||
|
||||
return {
|
||||
"workflow_id": workflow_id,
|
||||
|
|
@ -794,7 +801,7 @@ async def delete_file_from_message(
|
|||
mandate_id, user_id = await get_user_context(current_user)
|
||||
|
||||
# Add detailed logging
|
||||
print(f"DELETE request: Remove file {file_id} from message {message_id} in workflow {workflow_id}")
|
||||
logger.debug(f"DELETE request: Remove file {file_id} from message {message_id} in workflow {workflow_id}")
|
||||
|
||||
# WorkflowManager mit Benutzerkontext initialisieren
|
||||
workflow_manager = get_workflow_manager(mandate_id, user_id)
|
||||
|
|
@ -803,20 +810,20 @@ async def delete_file_from_message(
|
|||
# Workflow laden
|
||||
workflow = await workflow_manager.load_workflow(workflow_id)
|
||||
if not workflow:
|
||||
print(f"Workflow {workflow_id} not found")
|
||||
logger.error(f"Workflow {workflow_id} not found")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Workflow mit ID {workflow_id} nicht gefunden"
|
||||
)
|
||||
|
||||
# Log workflow info
|
||||
print(f"Workflow found: {workflow.get('name', workflow_id)}")
|
||||
logger.debug(f"Workflow found: {workflow.get('name', workflow_id)}")
|
||||
|
||||
# Print message structure to debug
|
||||
if "messages" in workflow:
|
||||
print(f"Workflow has {len(workflow['messages'])} messages")
|
||||
logger.debug(f"Workflow has {len(workflow['messages'])} messages")
|
||||
for i, msg in enumerate(workflow['messages']):
|
||||
print(f"Message {i+1}: ID={msg.get('id')}, Type={msg.get('agent_type')}")
|
||||
logger.debug(f"Message {i+1}: ID={msg.get('id')}, Type={msg.get('agent_type')}")
|
||||
|
||||
# Nachricht finden - try different approaches
|
||||
message = None
|
||||
|
|
@ -835,28 +842,28 @@ async def delete_file_from_message(
|
|||
if isinstance(m.get("id"), str) and m.get("id").startswith(message_id)), None)
|
||||
|
||||
if not message:
|
||||
print(f"Message {message_id} not found in workflow {workflow_id}")
|
||||
logger.error(f"Message {message_id} not found in workflow {workflow_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Nachricht mit ID {message_id} im Workflow {workflow_id} nicht gefunden"
|
||||
)
|
||||
|
||||
# Log message info
|
||||
print(f"Message found: {message.get('id')}, type: {message.get('agent_type')}")
|
||||
logger.debug(f"Message found: {message.get('id')}, type: {message.get('agent_type')}")
|
||||
|
||||
# Check documents array
|
||||
if "documents" not in message or not message["documents"]:
|
||||
print(f"No documents in message {message_id}")
|
||||
logger.error(f"No documents in message {message_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Keine Dateien in der Nachricht {message_id} gefunden"
|
||||
)
|
||||
|
||||
# Debug log documents
|
||||
print(f"Message has {len(message['documents'])} documents")
|
||||
logger.debug(f"Message has {len(message['documents'])} documents")
|
||||
for i, doc in enumerate(message["documents"]):
|
||||
source = doc.get("source", {})
|
||||
print(f"Document {i+1}: ID={doc.get('id')}, Source ID={source.get('id')}")
|
||||
logger.debug(f"Document {i+1}: ID={doc.get('id')}, Source ID={source.get('id')}")
|
||||
|
||||
# Search for file with flexible matching
|
||||
found_file = False
|
||||
|
|
@ -877,11 +884,11 @@ async def delete_file_from_message(
|
|||
file_index = i
|
||||
found_file = True
|
||||
found_doc = doc
|
||||
print(f"Found file at index {i}: doc_id={doc_id}, source_id={source_id}")
|
||||
logger.debug(f"Found file at index {i}: doc_id={doc_id}, source_id={source_id}")
|
||||
break
|
||||
|
||||
if not found_file:
|
||||
print(f"File {file_id} not found in message {message_id}")
|
||||
logger.error(f"File {file_id} not found in message {message_id}")
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_404_NOT_FOUND,
|
||||
detail=f"Datei mit ID {file_id} in der Nachricht {message_id} nicht gefunden"
|
||||
|
|
@ -892,7 +899,16 @@ async def delete_file_from_message(
|
|||
|
||||
# Log removal
|
||||
file_name = deleted_file.get("source", {}).get("name", file_id)
|
||||
print(f"Removed file {file_name} from message {message_id}")
|
||||
logger.info(f"Removed file {file_name} from message {message_id}")
|
||||
|
||||
# Add log entry using the refactored _add_log method
|
||||
workflow_manager._add_log(
|
||||
workflow,
|
||||
f"Datei aus Nachricht entfernt: {file_name} (ID: {file_id})",
|
||||
"info",
|
||||
message.get("agent_id"),
|
||||
message.get("agent_type")
|
||||
)
|
||||
|
||||
# Update workflow state
|
||||
workflow_manager._save_workflow(workflow)
|
||||
|
|
@ -902,9 +918,9 @@ async def delete_file_from_message(
|
|||
try:
|
||||
# Pass the file document and message to LucyDOM interface for more robust handling
|
||||
workflow_manager.lucydom_interface.update_workflow_message(message["id"], message)
|
||||
print(f"Database updated for message {message_id}")
|
||||
logger.debug(f"Database updated for message {message_id}")
|
||||
except Exception as e:
|
||||
print(f"Database update warning: {str(e)}")
|
||||
logger.warning(f"Database update warning: {str(e)}")
|
||||
|
||||
return {
|
||||
"workflow_id": workflow_id,
|
||||
|
|
@ -918,10 +934,10 @@ async def delete_file_from_message(
|
|||
# Re-raise HTTP exceptions
|
||||
raise
|
||||
except Exception as e:
|
||||
print(f"Error deleting file: {str(e)}")
|
||||
logger.error(f"Error deleting file: {str(e)}")
|
||||
import traceback
|
||||
traceback_str = traceback.format_exc()
|
||||
print(f"Traceback: {traceback_str}")
|
||||
logger.error(f"Traceback: {traceback_str}")
|
||||
|
||||
raise HTTPException(
|
||||
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
||||
|
|
|
|||
221
gwserver/test.py
221
gwserver/test.py
|
|
@ -1,41 +1,198 @@
|
|||
import asyncio
|
||||
import json
|
||||
from modules.agentservice_agent_webcrawler import WebcrawlerAgent
|
||||
import logging
|
||||
"""
|
||||
Test script for AgentService workflow with real implementations.
|
||||
This script tests a workflow execution with a user query using actual AI service.
|
||||
"""
|
||||
|
||||
# Konfiguration des Loggers
|
||||
import asyncio
|
||||
import logging
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[logging.StreamHandler()]
|
||||
handlers=[logging.StreamHandler(sys.stdout)]
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
async def main():
|
||||
agent = WebcrawlerAgent()
|
||||
# Fixed: Passing a dictionary with "task" key for the message_context parameter
|
||||
results = await agent.get_web_query({
|
||||
"task": "Bitte führe eine Webanalyse durch, welche Firmen haben die gleichen Produkte wie ValueOn AG?"
|
||||
})
|
||||
print(results) # Since result is a string, not JSON
|
||||
# Define a custom WorkflowManager class for testing
|
||||
class TestWorkflowManager:
|
||||
"""
|
||||
Custom implementation of WorkflowManager for testing purposes.
|
||||
"""
|
||||
|
||||
|
||||
async def main2():
|
||||
agent = WebcrawlerAgent()
|
||||
# Hier mit await aufrufen
|
||||
results = await agent.run_web_query(prompt="Welche Firmen haben die gleichen Produkte wie ValueOn AG?")
|
||||
with open('test_result.json', 'w') as file:
|
||||
json.dump(results, file)
|
||||
# Print the results
|
||||
if isinstance(results, list):
|
||||
for i, result in enumerate(results, 1):
|
||||
print(f"\nResult {i}:")
|
||||
print(f"Title: {result['title']}")
|
||||
print(f"URL: {result['url']}")
|
||||
print(f"Snippet: {result['snippet']}")
|
||||
|
||||
|
||||
def __init__(self, mandate_id: int = None, user_id: int = None,
|
||||
ai_service = None, lucydom_interface = None):
|
||||
"""Initialize the workflow manager"""
|
||||
self.mandate_id = mandate_id
|
||||
self.user_id = user_id
|
||||
self.ai_service = ai_service
|
||||
self.lucydom_interface = lucydom_interface
|
||||
self.workflows = {}
|
||||
self.results_dir = "./_results"
|
||||
os.makedirs(self.results_dir, exist_ok=True)
|
||||
|
||||
async def create_workflow(self, message, files=None):
|
||||
"""Create a new workflow with the given message and files."""
|
||||
workflow_id = f"wf_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
||||
|
||||
# Initialize workflow
|
||||
workflow = self._initialize_workflow(workflow_id)
|
||||
logger.info(f"Created workflow with ID: {workflow_id}")
|
||||
|
||||
# Create workflow execution
|
||||
from modules.agentservice_workflow_execution import WorkflowExecution
|
||||
execution = WorkflowExecution(
|
||||
self, workflow_id, self.mandate_id, self.user_id,
|
||||
self.ai_service, self.lucydom_interface
|
||||
)
|
||||
|
||||
# Execute workflow
|
||||
result = await execution.execute(message, workflow, files, True)
|
||||
return result
|
||||
|
||||
def _initialize_workflow(self, workflow_id: str) -> Dict[str, Any]:
|
||||
"""Initialize a new workflow."""
|
||||
current_time = datetime.now().isoformat()
|
||||
|
||||
workflow = {
|
||||
"id": workflow_id,
|
||||
"name": f"Test Workflow {workflow_id}",
|
||||
"mandate_id": self.mandate_id,
|
||||
"user_id": self.user_id,
|
||||
"status": "running",
|
||||
"started_at": current_time,
|
||||
"last_activity": current_time,
|
||||
"messages": [],
|
||||
"logs": []
|
||||
}
|
||||
self.workflows[workflow_id] = workflow
|
||||
return workflow
|
||||
|
||||
def _save_workflow(self, workflow: Dict[str, Any]) -> None:
|
||||
"""Save workflow."""
|
||||
self.workflows[workflow["id"]] = workflow
|
||||
# Also save to LucyDOM
|
||||
if self.lucydom_interface:
|
||||
self.lucydom_interface.save_workflow_state(workflow)
|
||||
|
||||
def _add_log(self, workflow_id, message, log_type, agent_id=None, agent_name=None):
|
||||
"""Add log to workflow."""
|
||||
if workflow_id in self.workflows:
|
||||
if "logs" not in self.workflows[workflow_id]:
|
||||
self.workflows[workflow_id]["logs"] = []
|
||||
|
||||
# Asynchronen Code ausführen
|
||||
log_entry = {
|
||||
"id": f"log_{uuid.uuid4()}",
|
||||
"message": message,
|
||||
"type": log_type,
|
||||
"timestamp": datetime.now().isoformat(),
|
||||
"agent_id": agent_id,
|
||||
"agent_name": agent_name
|
||||
}
|
||||
|
||||
self.workflows[workflow_id]["logs"].append(log_entry)
|
||||
logger.info(f"Log [{log_type}]: {message}")
|
||||
|
||||
async def run_test_workflow():
|
||||
"""Run a test workflow with a user prompt."""
|
||||
try:
|
||||
# Import necessary modules
|
||||
from modules.agentservice_workflow_manager import get_workflow_manager
|
||||
from connectors.connector_aichat_openai import ChatService
|
||||
from modules.lucydom_interface import get_lucydom_interface
|
||||
|
||||
# Initialize services
|
||||
# Set up mandate_id and user_id for testing
|
||||
mandate_id = 1
|
||||
user_id = 1
|
||||
|
||||
# Initialize the AI service
|
||||
ai_service = ChatService()
|
||||
|
||||
# Initialize the LucyDOM interface for database access
|
||||
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
|
||||
|
||||
# Initialize our custom workflow manager for testing
|
||||
workflow_manager = TestWorkflowManager(
|
||||
mandate_id=mandate_id,
|
||||
user_id=user_id,
|
||||
ai_service=ai_service,
|
||||
lucydom_interface=lucydom_interface
|
||||
)
|
||||
|
||||
# Create a test message
|
||||
test_message = {
|
||||
"role": "user",
|
||||
"content": "Please analyze the CSV file and give me a summary of the data."
|
||||
}
|
||||
|
||||
# Add a sample CSV file
|
||||
# For testing, let's create a real CSV file
|
||||
csv_content = b"name,age,location\nJohn,30,New York\nAlice,25,London\nBob,35,Paris\nEmma,28,Berlin"
|
||||
file_meta = lucydom_interface.save_uploaded_file(csv_content, "data.csv")
|
||||
logger.info(f"Uploaded test CSV file: {file_meta}")
|
||||
|
||||
# List of files to include in the workflow
|
||||
files = [file_meta]
|
||||
|
||||
# Execute the workflow with real implementation
|
||||
logger.info("Executing workflow...")
|
||||
result = await workflow_manager.create_workflow(test_message, files)
|
||||
|
||||
# Print the result
|
||||
logger.info(f"Workflow execution completed with status: {result.get('status')}")
|
||||
logger.info(f"Workflow ID: {result.get('workflow_id')}")
|
||||
|
||||
# Print messages in the workflow
|
||||
messages = result.get("messages", [])
|
||||
logger.info(f"Number of messages in workflow: {len(messages)}")
|
||||
|
||||
for i, msg in enumerate(messages, 1):
|
||||
role = msg.get("role", "unknown")
|
||||
agent_type = msg.get("agent_type", "")
|
||||
agent_name = msg.get("agent_name", "")
|
||||
content = msg.get("content", "")
|
||||
|
||||
agent_info = f" ({agent_name})" if agent_name else ""
|
||||
logger.info(f"Message {i} - {role}{agent_info}:")
|
||||
|
||||
# Print first 100 chars of content
|
||||
if content:
|
||||
preview = content[:100] + "..." if len(content) > 100 else content
|
||||
logger.info(f"Content preview: {preview}")
|
||||
else:
|
||||
logger.info("No content")
|
||||
|
||||
logger.info("-" * 40)
|
||||
|
||||
# Save the complete result to a JSON file for inspection
|
||||
with open("workflow_test_result.json", "w") as f:
|
||||
json.dump(result, f, indent=2)
|
||||
logger.info("Workflow result saved to 'workflow_test_result.json'")
|
||||
|
||||
# Clean up - close AI service
|
||||
await ai_service.close()
|
||||
|
||||
return result
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Import error: {e}")
|
||||
logger.error("This test script requires the actual modules to be in the Python path.")
|
||||
logger.error("You may need to adjust your PYTHONPATH to include the project directory.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f"Error running test workflow: {e}")
|
||||
import traceback
|
||||
logger.error(traceback.format_exc())
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
# Run the async function
|
||||
asyncio.run(run_test_workflow())
|
||||
File diff suppressed because one or more lines are too long
191
gwserver/workflow_test_result.json
Normal file
191
gwserver/workflow_test_result.json
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
{
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"status": "completed",
|
||||
"messages": [
|
||||
{
|
||||
"id": "msg_71988f72-f0dc-431f-a3f1-6cfe84cc339b",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:34:33.805887",
|
||||
"finished_at": null,
|
||||
"sequence_no": 1,
|
||||
"status": "pending",
|
||||
"role": "user",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [
|
||||
{
|
||||
"id": 8,
|
||||
"source": {
|
||||
"type": "file",
|
||||
"id": 8,
|
||||
"name": "data.csv",
|
||||
"content_type": "application/vnd.ms-excel",
|
||||
"size": 78,
|
||||
"upload_date": "2025-04-11T23:34:33.809888"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": "name,age,location\nJohn,30,New York\nAlice,25,London\nBob,35,Paris\nEmma,28,Berlin",
|
||||
"is_extracted": true
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"content": "Please analyze the CSV file and give me a summary of the data.",
|
||||
"agent_type": null
|
||||
},
|
||||
{
|
||||
"id": "msg_2bcd999f-0ac7-4ccc-8f4b-0b1aa55fa189",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:35:25.915648",
|
||||
"finished_at": null,
|
||||
"sequence_no": 2,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "## Fehler bei der Codeausf\u00fchrung\n\n### Ausgef\u00fchrter Code\n\n```python\nimport pandas as pd\nimport asyncio\n\n# Import the necessary helper functions\nfrom helper_functions import load_file, process_csv\n\nasync def load_and_process_csv(file_id):\n try:\n # Load the CSV file content asynchronously\n csv_content = await load_file(file_id, encoding='utf-8')\n \n # Process the CSV content using pandas\n df = process_csv(csv_content)\n \n # Create a summary of the DataFrame\n summary = {\n 'columns': df.columns.tolist(),\n 'head': df.head().to_dict(orient='records'),\n 'description': df.describe().to_dict()\n }\n \n # Prepare the result dictionary\n result = {\n 'status': 'success',\n 'summary': summary\n }\n \n except Exception as e:\n # Handle any exceptions that occur\n result = {\n 'status': 'error',\n 'message': str(e)\n }\n \n return result\n\n# Example usage\n# Assuming 'data.csv' has a file_id of '12345'\nfile_id = '12345'\nresult = asyncio.run(load_and_process_csv(file_id))\nprint(result)\n```\n\n### Fehler\n\n```\nFehler bei der Installation der erforderlichen Pakete: Fehler bei der Paketinstallation: error: subprocess-exited-with-error\n \n Getting requirements to build wheel did not run successfully.\n exit code: 1\n \n [15 lines of output]\n The 'sklearn' PyPI package is deprecated, use 'scikit-learn'\n rather than 'sklearn' for pip commands.\n \n Here is how to fix this error in the main use cases:\n - use 'pip install scikit-learn' rather than 'pip install sklearn'\n - replace 'sklearn' by 'scikit-learn' in your pip requirements files\n (requirements.txt, setup.py, setup.cfg, Pipfile, etc ...)\n - if the 'sklearn' package is used by one of your dependencies,\n it would be great if you take some time to track which package uses\n 'sklearn' instead of 'scikit-learn' and report it to their issue tracker\n - as a last resort, set the environment variable\n SKLEARN_ALLOW_DEPRECATED_SKLEARN_PACKAGE_INSTALL=True to avoid this error\n \n More information is available at\n https://github.com/scikit-learn/sklearn-pypi-package\n [end of output]\n \n note: This error originates from a subprocess, and is likely not a problem with pip.\nerror: subprocess-exited-with-error\n\nGetting requirements to build wheel did not run successfully.\nexit code: 1\n\nSee above for output.\n\nnote: This error originates from a subprocess, and is likely not a problem with pip.\n\n[notice] A new release of pip is available: 23.2.1 -> 25.0.1\n[notice] To update, run: C:\\Users\\pmots\\AppData\\Local\\Temp\\ai_code_exec_x1z8_0nq\\venv\\Scripts\\python.exe -m pip install --upgrade pip\n\n```\n\n\n### Empfehlung\nUm den Fehler zu beheben:\n1. \u00dcberpr\u00fcfen Sie die genaue Fehlermeldung\n2. Vereinfachen Sie den Code und testen Sie schrittweise\n3. Stellen Sie sicher, dass alle ben\u00f6tigten Daten korrekt geladen werden\n4. Verwenden Sie try/except-Bl\u00f6cke f\u00fcr fehleranf\u00e4llige Operationen\n",
|
||||
"agent_type": "coder",
|
||||
"agent_id": "coder",
|
||||
"agent_name": "Python Code Agent",
|
||||
"result_format": "DataFrame"
|
||||
},
|
||||
{
|
||||
"id": "msg_54268450-4dd3-4cbe-8156-0fd067bb3b77",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:35:35.822670",
|
||||
"finished_at": null,
|
||||
"sequence_no": 3,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "To analyze the provided data summary, let's break down the information:\n\n1. **Number of Rows and Columns:**\n - The data consists of three columns: `name`, `age`, and `location`.\n - Based on the sample data provided, there are at least four rows (John, Alice, Bob, Emma). However, the ellipsis (`...`) indicates that there may be more rows in the actual dataset.\n\n2. **Data Types of Each Column:**\n - `name`: This column contains text data, so the data type is likely `string` or `object` in pandas.\n - `age`: This column contains numerical data, specifically integers, so the data type is likely `int`.\n - `location`: This column contains text data, so the data type is likely `string` or `object` in pandas.\n\n### Summary:\n- **Columns**: 3 (`name`, `age`, `location`)\n- **Rows**: At least 4 (potentially more)\n- **Data Types**:\n - `name`: String/Object\n - `age`: Integer\n - `location`: String/Object\n\n### Detail Analysis:\n- The dataset appears to be a simple demographic dataset with basic information about individuals.\n- The `age` column is numerical, which allows for statistical analysis such as calculating the average age, age distribution, etc.\n- The `location` and `name` columns are categorical, which can be used for grouping or filtering the data.\n\n### Recommendations:\n- **Data Quality Check**: Ensure there are no missing values or inconsistencies in the `age` column, as it is crucial for numerical analysis.\n- **Data Enrichment**: If possible, add more demographic details such as gender or occupation to enhance the dataset's analytical potential.\n- **Visualization**: Create visualizations such as histograms for age distribution and bar charts for the frequency of locations to better understand the dataset's composition.\n- **Further Analysis**: Consider segmenting the data by location to identify any regional trends or patterns in age distribution.\n\nIf you have access to the full dataset, loading it into a pandas DataFrame would allow for a more comprehensive analysis and verification of these initial insights.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyzer",
|
||||
"agent_name": "Datenanalyst",
|
||||
"result_format": "AnalysisReport"
|
||||
},
|
||||
{
|
||||
"id": "msg_342c327a-dc1f-4d5b-b54e-30de0042b17e",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:35:45.975065",
|
||||
"finished_at": null,
|
||||
"sequence_no": 4,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "To analyze the DataFrame for missing values and provide a summary of their distribution, we need to perform the following steps:\n\n1. **Check for Missing Values**: Identify any missing values in the dataset.\n2. **Summarize Missing Values**: Provide a summary of where these missing values occur.\n\nLet's assume we have loaded the data into a DataFrame named `df`. Here is how you can perform the analysis:\n\n### Step 1: Check for Missing Values\n\n```python\nimport pandas as pd\n\n# Load the data into a DataFrame\ndata = {\n 'name': ['John', 'Alice', 'Bob', 'Emma'],\n 'age': [30, 25, 35, 28],\n 'location': ['New York', 'London', 'Paris', 'Berlin']\n}\ndf = pd.DataFrame(data)\n\n# Check for missing values\nmissing_values = df.isnull().sum()\n```\n\n### Step 2: Summarize Missing Values\n\n```python\n# Summarize missing values\nmissing_summary = missing_values[missing_values > 0]\n\n# Print summary\nprint(\"Summary of Missing Values:\")\nprint(missing_summary)\n```\n\n### Summary\n\n- **Missing Values**: Based on the provided data snippet, there are no missing values in the columns `name`, `age`, and `location`.\n- **Distribution**: Since there are no missing values, there is no distribution to report.\n\n### Detail Analysis\n\n- **Data Completeness**: The dataset appears to be complete with no missing entries in the provided columns.\n- **Potential Issues**: If the dataset were larger, it would be important to check for missing values in other parts of the dataset not shown in the snippet.\n\n### Recommendations\n\n1. **Data Validation**: Ensure that the data collection process is robust to prevent missing values.\n2. **Handling Missing Data**: If missing values are detected in a larger dataset, consider strategies such as imputation or removal, depending on the context and significance of the missing data.\n3. **Regular Checks**: Implement regular checks for data quality to maintain the integrity of the dataset.\n\nIf you have a larger dataset or specific areas you want to focus on, please provide more details for a more tailored analysis.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyzer",
|
||||
"agent_name": "Datenanalyst",
|
||||
"result_format": "AnalysisReport"
|
||||
},
|
||||
{
|
||||
"id": "msg_8bcb2566-cc56-4a8f-827a-fa64ad7274c9",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:35:53.968709",
|
||||
"finished_at": null,
|
||||
"sequence_no": 5,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "To generate descriptive statistics for the provided dataset, we will focus on the numerical column, which is \"age\". The other columns, \"name\" and \"location\", are categorical and do not lend themselves to the same type of statistical analysis. Here is a structured table presenting the descriptive statistics for the \"age\" column:\n\n| Statistic | Age |\n|--------------------|------|\n| Count | 4 |\n| Mean | 29.5 |\n| Median | 29.0 |\n| Standard Deviation | 4.2 |\n| Minimum | 25 |\n| 25th Percentile | 27.0 |\n| 50th Percentile | 29.0 |\n| 75th Percentile | 31.5 |\n| Maximum | 35 |\n\n### Summary\n- **Count**: There are 4 entries in the \"age\" column.\n- **Mean**: The average age is 29.5 years.\n- **Median**: The middle value when ages are ordered is 29 years.\n- **Standard Deviation**: The spread of ages around the mean is approximately 4.2 years.\n- **Quartiles**: The ages are distributed with the 25th percentile at 27 years, the median (50th percentile) at 29 years, and the 75th percentile at 31.5 years.\n\n### Detail Analysis\n- The ages range from 25 to 35 years, indicating a relatively young group.\n- The standard deviation suggests moderate variability in the ages.\n- The quartiles show that half of the individuals are between 27 and 31.5 years old.\n\n### Recommendations\n- If further analysis is needed on age-related trends, consider increasing the sample size for more robust insights.\n- For categorical data like \"location\", consider analyzing the frequency distribution or cross-tabulations with other variables if available.\n- Ensure data quality by checking for any missing or outlier values in larger datasets.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyzer",
|
||||
"agent_name": "Datenanalyst",
|
||||
"result_format": "AnalysisReport"
|
||||
},
|
||||
{
|
||||
"id": "msg_e4cb8e47-7fa2-4b6d-981b-18161c86f0e0",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:36:01.195923",
|
||||
"finished_at": null,
|
||||
"sequence_no": 6,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "To analyze the DataFrame and identify the unique values and their counts for each categorical column, we will focus on the 'name' and 'location' columns, as 'age' is typically a numerical column. Here is how the analysis can be structured:\n\n### Data Analysis\n\n#### Unique Values and Counts for Categorical Columns\n\n| Column | Unique Values | Count of Each Unique Value |\n|----------|---------------|----------------------------|\n| name | John | 1 |\n| | Alice | 1 |\n| | Bob | 1 |\n| | Emma | 1 |\n| location | New York | 1 |\n| | London | 1 |\n| | Paris | 1 |\n| | Berlin | 1 |\n\n### Summary\n\n- **Name Column**: Each name in the dataset is unique, with a count of 1 for each individual.\n- **Location Column**: Each location is also unique, with a count of 1 for each city.\n\n### Detail Analysis\n\n- The dataset contains four unique names and four unique locations.\n- There is a one-to-one relationship between names and locations, indicating that each individual is associated with a distinct city.\n\n### Recommendations\n\n- If the dataset is intended to be larger or more diverse, consider adding more entries to increase the variety of names and locations.\n- Ensure that the dataset remains balanced if used for demographic analysis, by potentially adding more entries from different locations or with different names.\n- If further analysis is required, consider exploring additional attributes or expanding the dataset to include more individuals and locations.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyzer",
|
||||
"agent_name": "Datenanalyst",
|
||||
"result_format": "AnalysisReport"
|
||||
},
|
||||
{
|
||||
"id": "msg_1a9e22b8-420f-4b6e-a851-369b3040d5e3",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:36:16.361313",
|
||||
"finished_at": null,
|
||||
"sequence_no": 7,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "To provide a comprehensive summary of the dataset, I will analyze the structure, missing values, descriptive statistics, and unique values for categorical columns based on the provided data snippet.\n\n### Dataset Summary\n\n#### 1. Structure\n- **Columns**: 3\n - `name`: Categorical\n - `age`: Numerical\n - `location`: Categorical\n- **Sample Data**:\n - John, 30, New York\n - Alice, 25, London\n - Bob, 35, Paris\n - Emma, 28, Berlin\n\n#### 2. Missing Values\n- Based on the provided snippet, there are no missing values. However, a complete dataset check is necessary to confirm this.\n\n#### 3. Descriptive Statistics\n- **Age** (Numerical):\n - Mean: (30 + 25 + 35 + 28) / 4 = 29.5\n - Minimum: 25\n - Maximum: 35\n - Standard Deviation: Calculation requires more data points for accuracy.\n \n#### 4. Unique Values for Categorical Columns\n- **Name**:\n - Unique Values: John, Alice, Bob, Emma\n - Total Unique: 4\n- **Location**:\n - Unique Values: New York, London, Paris, Berlin\n - Total Unique: 4\n\n### Detailed Analysis\n\n- **Age Distribution**: The ages range from 25 to 35, with a mean age of 29.5. This indicates a relatively young demographic.\n- **Location Diversity**: The dataset includes individuals from four major cities, suggesting a diverse geographical representation.\n\n### Recommendations\n\n1. **Data Quality Check**: Conduct a thorough review of the entire dataset to ensure there are no missing values or inconsistencies.\n2. **Expand Dataset**: To improve statistical analysis, consider expanding the dataset to include more entries, which will provide a more robust standard deviation and other statistical measures.\n3. **Categorical Analysis**: Further analysis on the distribution of names and locations could provide insights into demographic patterns or preferences.\n4. **Visualization**: Create visualizations such as histograms for age distribution and bar charts for location frequency to better understand the data.\n\nThis summary provides a foundational understanding of the dataset. Further exploration with complete data will yield more detailed insights.",
|
||||
"agent_type": "analyzer",
|
||||
"agent_id": "analyzer",
|
||||
"agent_name": "Datenanalyst",
|
||||
"result_format": "AnalysisReport"
|
||||
},
|
||||
{
|
||||
"id": "msg_9a9da16c-5361-4544-a353-296ea602d980",
|
||||
"workflow_id": "wf_20250411233433",
|
||||
"started_at": "2025-04-11T23:36:26.748703",
|
||||
"finished_at": null,
|
||||
"sequence_no": 8,
|
||||
"status": "pending",
|
||||
"role": "assistant",
|
||||
"data_stats": {
|
||||
"processing_time": 0.0,
|
||||
"token_count": 0,
|
||||
"bytes_sent": 0,
|
||||
"bytes_received": 0
|
||||
},
|
||||
"documents": [],
|
||||
"content": "### Workflow Summary\n\n#### Key Findings and Results\n\n1. **Data Loading and Structure**:\n - The CSV file was loaded and processed, revealing a dataset with three columns: `name`, `age`, and `location`.\n - The dataset contains at least four rows, with potential for more as indicated by the ellipsis.\n\n2. **Data Structure**:\n - Columns: \n - `name` and `location` are categorical.\n - `age` is numerical.\n\n3. **Missing Values**:\n - An analysis was conducted to identify missing values, although specific results were not detailed in the summary.\n\n4. **Descriptive Statistics**:\n - Focused on the `age` column:\n - Count: 4\n - Mean: 29.5\n - Median: 29.0\n - Standard deviation was not fully detailed.\n\n5. **Unique Values**:\n - Unique values and their counts were identified for the `name` and `location` columns.\n\n6. **Comprehensive Summary**:\n - The dataset includes a mix of categorical and numerical data, with a sample including individuals like John, Alice, Bob, and Emma from various locations.\n\n#### Connection to Original Task\n\nThe workflow effectively processed and analyzed the CSV file, providing insights into the dataset's structure, missing values, descriptive statistics, and unique values. This aligns with the original task of loading and analyzing a CSV file to extract meaningful information.\n\n#### Conclusions and Recommendations\n\n- **Conclusions**:\n - The dataset is well-structured with clear categorical and numerical distinctions.\n - Basic descriptive statistics and unique value counts provide a foundational understanding of the data.\n\n- **Recommendations**:\n - Further analysis could explore the handling of missing values if they exist.\n - Additional statistical analysis could be performed on the `age` column for more detailed insights.\n - Consider expanding the dataset analysis to include more rows for a comprehensive view.",
|
||||
"agent_type": "summary",
|
||||
"agent_id": "workflow_summary",
|
||||
"agent_name": "Workflow Summary",
|
||||
"result_format": "Text",
|
||||
"workflow_complete": true
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -1,52 +1,17 @@
|
|||
....................... TASKS
|
||||
|
||||
PowerOn Message rein
|
||||
Bericht: Kürzer
|
||||
|
||||
|
||||
----------------------- OPEN
|
||||
|
||||
PRIO1:
|
||||
|
||||
bitte eliminiere alle "waiting_for_user" flags, weil nie auf den user gewartet wird. Dies ist irreführend.
|
||||
|
||||
der ablauf ist so:
|
||||
- user startet workflow (ohne id --> neu) oder (mit id --> bestehenden weiterführen)
|
||||
- damit beginnt das polling
|
||||
- wenn der workflow fertig ist wird er abgeschlossen mit dem prompttext an den user. dazu sollte ein flag in der art "antwort komplett" gesendet werden.
|
||||
- ein "stop" des users beendet den workflow sofort mit dem flag "antwort komplett"
|
||||
- sobald das frontend alle messages bis "antwort komplett" im frontend erfasst hat, ist der workflow fertig mit den anpassungen von buttons, etc.
|
||||
|
||||
kannst du mir bitte angeben, wo im code was anzupassen ist, und was entfernt werden kann, da es "waiting for user" in dieser form nicht gibt.
|
||||
das frontend muss nur wissen, wann ein user prompt gesendet wurde, denn dann startet das polling. und es muss wissen, wann entweder der user "Stop" gedürckt hat, oder wann der user für einen neuen prompt aufgefordert wird, weil das backend mit dem workflow fertig ist. beim erneuten user prompt ist es wichtig, dass alle messages aus dem backend im frontend geladen werden und keine vergessen wird. vielleicht hast du dafür eine geeignete methode.
|
||||
|
||||
|
||||
|
||||
Dokumenterzeugung geht noch nicht.
|
||||
- dokumente sollen autmatisch als solche in der datenbank-tabelle files erzeugt werden und in der db als message-objekt integriert werden.
|
||||
|
||||
Setup intelligent agents:
|
||||
- moderator: flow
|
||||
- ai to get list of agents (without user in the list) with their task and result shaping
|
||||
- to run all agents --> messages
|
||||
- ai to get summary and user prompt based on question from user
|
||||
- user WITH pprompt
|
||||
- analyze image:
|
||||
- NOT to call when image is loaded
|
||||
- ai to read image with prompt
|
||||
- webcrawler:
|
||||
- ai to give a list of url and a list of key-sentences and 3 websearch-engines to search
|
||||
- search pages and collect content-list with REFERENCE
|
||||
- ai to summarize content
|
||||
- return summary with referencelist
|
||||
- documenter: ai to give generic prompt in 2 steps
|
||||
- 1 table of content including summary
|
||||
- 2 content per chapter
|
||||
- coder
|
||||
- identify available routines and connectors (e.g. sharepoint or mail), make list of functions with parameters
|
||||
- ai to give steps with functioncalls and required result with format
|
||||
- write code
|
||||
- run code in env
|
||||
- send back result
|
||||
nda einbinden
|
||||
|
||||
agents_modules to load dynamically
|
||||
|
||||
ENV Variable setzen (extract from code) und config in .gitignore für example.env
|
||||
|
||||
|
|
@ -54,7 +19,6 @@ Database to remove from backend
|
|||
|
||||
workflow.css --> cleanup, later definition is newer
|
||||
|
||||
file content to read with propmt for file --> prompt prepared for the specific files
|
||||
|
||||
|
||||
|
||||
|
|
@ -70,15 +34,6 @@ frontend: no labels definition
|
|||
|
||||
sharepoint connector with document search, content search, content extraction
|
||||
|
||||
Implement Connector für Agent Code dev + execute
|
||||
- venv für ihn machen und angeben, wo er ausführen kann
|
||||
|
||||
coder: to include available functions and connectors to the prompt for coding
|
||||
|
||||
webcrawler:
|
||||
- asks ai for steps to do --> List
|
||||
- Keine Listen auf deutsch etc., sondern generisch
|
||||
|
||||
add connector to myoutlook
|
||||
|
||||
|
||||
|
|
@ -94,6 +49,68 @@ Systemarchitektur (Grundsätze der Architektur, Komponenten und deren Aufbau)
|
|||
|
||||
|
||||
|
||||
|
||||
# WORKFLOW EXECUTION
|
||||
|
||||
Die workflow execution soll so angepasst werden:
|
||||
1. Der Workflow startet wie bisher bis und mit message initialisierung
|
||||
2. Dann wird über den AI Call der Arbeitsplan erstellt, welcher als Resultat eine Liste der Aktivitäten liefert, die auszuführen sind. Pro Schritt ist strukturiert erfasst:
|
||||
- Was ist im Schritt zu tun? Dies als AI Prompt, um anschliessend die Agenten für den Schritt zu definieren
|
||||
- Welche Daten sind dazu nötig? Dies formuliert als AI Prompt an den Dateien-Manager
|
||||
- Welches Resultat soll geliefert werden? - Strukturierte Angabe von Formatvorgaben (z.B. "Liste von Dateien","Text","JSON", "Tabelle", etc.)
|
||||
3. Nun wird die Liste der Aktivitäten abgearbeitet. Pro Aktivität erfolgt dies:
|
||||
- Agenten mit ihren Eigenschaften und dem Resultatformat zusammenstellen
|
||||
- Mit AI Call festlegen, welche Agenten in welcher Reihenfolge nötig sind.
|
||||
- Nun die Agenten schrittweise ausführen lassen. dazu diese schritte pro agent:
|
||||
-- message object mit prompt und der angabe des letzten message objectes im workflow vorbereiten
|
||||
-- Mit dem Hilfsmodul "agentservice_dataextraction.py" die nötigen Daten aus dem Workflow extrahierenund dem message object des agenten zufügen. Im Hilfsmodul noch das Objekt messages definieren.
|
||||
-- agent liefert das resultat, welches als message object im workflow ergänzt wird.
|
||||
4. Nun die Zusammenfassung der durch die agenten erstellten resultate für den User erstellen und ebenfalls als message im workflow speichern.
|
||||
|
||||
|
||||
# CODE STRUKTUR
|
||||
|
||||
Aktuell hat es in jedem Modul und auch im Hauptmodul von agentservice* detaillierten Code drin. Kannst Du im gleichen Zug den Code aufräumen, dass "agentservice_workflow_manager" als master-modul nur funktionen aufruft und nicht noch details bearbeitet. so kann der workflow besser geführt werden.
|
||||
|
||||
Die Meldungen im "_add_log()" sowie die Logger-Mledungen sind unübersichtlich und helfen kaum zur Analyse. Bitte diese Meldungen anhand des Workflows strukturieren und auch die Moderator-Anweisungen (zusammengefasst im _add_log und mit den parametern (lange texte gekürzt) im logger) ausgeben, damit eine Fehlersuche einfacher ist.
|
||||
|
||||
Bitte Hilfsfunktionen, welche überall immer wieder verwendet werden, in ein utility modul auslagern. Als Idee Dinge wie
|
||||
- Class mit Methoden zum lesen, schreiben, extrahieren von messages im workflow inklusive Typenkonversion von Dict in str. Dass ich z.B. schreiben kann (nur als idee, gibt eventuell schlauere funktionen): workflow(id).documents.extract_by_prompt(prompt).to_str()
|
||||
- Bitte analysiere den code, was an Funktionen Sinn macht
|
||||
|
||||
Allenfalls noch andere Themen, die helfen, den Code zu vereinfachen. Das Ziel soll es sein, dass der Workflow und die Agentencodes nicht jedes Detail immer codiert haben müssen mit immer wieder fehlerabfangroutinen, sondern dass wie auf vordefinierte module zugreifen können und diese durchgängig nutzen. damit soll der code massiv verkürzt werden.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# DATEIEN EINLESEN
|
||||
|
||||
Wenn eine Datei/File (in der datenbank ein Dokument) als Text lesbar ist (txt, csv, html, Text in Pdf etc.), dann wird der text des dokumentes direkt ausgelesen und als DocumentContent in der DB erfasst --> is_extracted=True. Wenn ein Dokument nicht als Text lesbar ist (Bilder, Videos, Bilder in PDF etc.), dann wird der text des entsprechenden DocumentContent nicht extrahiert, also is_extracted=False. (hinweis: Die extraktion findet dann erst im workflow mit einem prompt statt.)
|
||||
|
||||
|
||||
# AGENTEN
|
||||
|
||||
In jedem Agenten-Profil ein Attribut ergänzen, welches spezifisch angibt, in welchem Format der Agent das Resultat zurückliefert (z.b. "DocumentID" oder "Text" oder "List of ..." etc.).
|
||||
|
||||
|
||||
# HILFSFUNKTIONEN
|
||||
|
||||
1. data_extraction(prompt) --> messages: ai call durchführen mit einer liste aller dateien mit ihren metadaten und aller messages im workflow. mit dem prompt prüfen, welche inhalte von welchem datenobjekt erforderlich sind. das resultat soll eine liste sein, welche pro datenobjekt den prompt enthält, um die nötigen daten zu extrahieren. diese liste abarbeiten (falls ein dokument den inhalt nicht extrahiert hat, diesen nun mit der entsprechenden funktion extrahieren; bild-extraktion ist bereits als funktion verfügbar) und die extrahierten daten mit ihren kontext-informationen als strukturiertes text-object zurückgeben (metadaten mit extrahierten inhalten)
|
||||
|
||||
|
||||
# ZUSAETZLOCHE AGENTEN
|
||||
|
||||
NEU: Der Filecreator kann dies tun, welche relevant für seine Fähigkeiten sind:
|
||||
Datei erstellen --> Document object in der Datenbank mit dem mitgelieferten inhalt und datentyp erzeugen und die id zum Datenobjekt zurückliefern
|
||||
|
||||
|
||||
Implementieren: Coder
|
||||
Dieser soll python code generieren und als Parameter die verfügbaren Funktionen im Umsystem (z.b. für Files laden und speichern) (als Erweiterung im Beispiel soll pro Funktion angegeben werden, welche Parameter und welches Resultat-format, hier ein geeigneter vorschlag von dir bitte). Den Code anschliessend ausführen, so wie im Code Beispiel "_code_exec_temp.py". Dann das Resultat zurückgeben.
|
||||
|
||||
|
||||
|
||||
|
||||
Workflow module refactored
|
||||
|
||||
Summary of Changes
|
||||
|
|
|
|||
Loading…
Reference in a new issue