stable backbone (workflow, agents)

This commit is contained in:
valueon 2025-04-14 20:05:33 +02:00
parent 61acf4950b
commit b0c45fb798
36 changed files with 5830 additions and 10184 deletions

View file

@ -1,190 +0,0 @@
"""
Datenanalyst-Agent für die Analyse und Interpretation von Daten.
"""
import logging
from typing import List, Dict, Any, Optional
from modules.agentservice_base import BaseAgent
from connectors.connector_aichat_openai import ChatService
logger = logging.getLogger(__name__)
class AnalystAgent(BaseAgent):
"""Agent für die Analyse und Interpretation von Daten"""
_instance = None
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz zurück"""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert den Datenanalyst-Agenten"""
super().__init__()
self.id = "analyst_agent"
self.name = "Datenanalyst"
self.type = "analyzer"
self.description = "Analysiert und interpretiert Daten"
self.capabilities = "Datenanalyse, Mustererkennung, Statistik und Bewertung"
self.instructions = """
Du bist der Datenanalyseagent. Deine Aufgabe:
1. Vorliegende Daten untersuchen und interpretieren
2. Erkenntnisse aus Informationen gewinnen
3. Trends identifizieren und Zusammenhänge prüfen
4. Daten visualisieren und Konzepte erklären
5. Datenqualität bewerten und Handlungsempfehlungen geben
"""
self.result_format = "AnalysisReport"
def get_prompt(self, message_context: Dict[str, Any]) -> str:
"""
Generiert einen angepassten Prompt für den Datenanalysten.
Args:
message_context: Kontext der Nachricht
Returns:
Formatierter Prompt für den Datenanalysten
"""
# Basis-Prompt
prompt = f"""
Du bist {self.name}, ein {self.type} Agent.
{self.description}
Fähigkeiten: {self.capabilities}
{self.instructions}
Analysiere die vorliegenden Daten. Präsentiere klar strukturierte Ergebnisse
mit einer Zusammenfassung, Detailanalyse und Handlungsempfehlungen.
Formatiere mit [STATUS: ERGEBNIS/TEILWEISE/PLAN] am Ende.
"""
# Dateitypspezifische Anweisungen hinzufügen (verkürzt)
document_types = self._get_document_types(message_context)
if "csv" in document_types or "excel" in document_types:
prompt += "\nTABELLENDATEN: Identifiziere wichtige Spalten, Korrelationen und Trends."
if "pdf" in document_types or "doc" in document_types:
prompt += "\nTEXTDATEN: Extrahiere zentrale Fakten und Schlüsselthemen."
if "image" in document_types:
prompt += "\nBILDDATEN: Beschreibe und interpretiere dargestellte Informationen."
return prompt.strip()
def _get_document_types(self, message_context: Dict[str, Any]) -> List[str]:
"""
Extrahiert die Dateitypen aus dem Nachrichtenkontext.
Args:
message_context: Kontext der Nachricht
Returns:
Liste der Dateitypen
"""
document_types = []
# Versuche Dokumente aus dem Kontext zu extrahieren
documents = message_context.get("documents", [])
for doc in documents:
source = doc.get("source", {})
name = source.get("name", "").lower()
content_type = source.get("content_type", "").lower()
# Dateityp aus Namen oder Content-Type ableiten
if name.endswith(".csv") or "csv" in content_type:
document_types.append("csv")
elif name.endswith((".xls", ".xlsx")) or "excel" in content_type or "spreadsheet" in content_type:
document_types.append("excel")
elif name.endswith(".pdf") or "pdf" in content_type:
document_types.append("pdf")
elif name.endswith((".doc", ".docx")) or "word" in content_type:
document_types.append("doc")
elif name.endswith((".jpg", ".jpeg", ".png", ".gif")) or "image" in content_type:
document_types.append("image")
return document_types
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und führt eine Datenanalyse durch.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext (optional)
Returns:
Die generierte Antwort mit der Datenanalyse
"""
try:
# Prompt generieren
message_context = {"documents": context.get("documents", [])} if context else {}
prompt = self.get_prompt(message_context)
# OpenAI ChatService initialisieren
chat_service = ChatService()
# Nachrichten für die API vorbereiten
messages = [
{"role": "system", "content": prompt},
{"role": "user", "content": message.get("content", "")}
]
# Kontext-Nachrichten hinzufügen, falls vorhanden
if context and "history" in context:
for history_item in context["history"]:
messages.append({
"role": history_item.get("role", "user"),
"content": history_item.get("content", "")
})
# API aufrufen
response_content = await chat_service.call_api(messages)
# Verbindung schließen
await chat_service.close()
# Antwort-Objekt erstellen
analysis_response = {
"role": "assistant",
"content": response_content,
"agent_type": self.type
}
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
content, status = self.extract_status(analysis_response["content"])
analysis_response["content"] = content
# Setze den Status im Kontext, falls vorhanden
if context is not None:
context["status"] = status
analysis_response["result_format"] = self.result_format
return analysis_response
except Exception as e:
logger.error(f"Fehler bei der Verarbeitung der Anfrage: {str(e)}", exc_info=True)
# Fehlerantwort zurückgeben
return {
"role": "assistant",
"content": f"Bei der Datenanalyse ist ein Fehler aufgetreten: {str(e)}",
"agent_type": self.type
}
# Singleton-Instanz
_analyst_agent = None
def get_analyst_agent():
"""Gibt eine Singleton-Instanz des Datenanalyst-Agenten zurück"""
global _analyst_agent
if _analyst_agent is None:
_analyst_agent = AnalystAgent()
return _analyst_agent

View file

@ -1,426 +0,0 @@
"""
Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code (Fortsetzung).
"""
import logging
import json
import os
from typing import List, Dict, Any, Optional
import asyncio
import re
import traceback
from datetime import datetime
from modules.agentservice_base import BaseAgent
from modules.lucydom_interface import get_lucydom_interface
from modules.agentservice_code_executor import CodeExecutor
logger = logging.getLogger(__name__)
class CoderAgent(BaseAgent):
"""Erweiterter Agent für die Entwicklung und Ausführung von Python-Code"""
async def _execute_code(self, code: str, lucydom_interface, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Führt Python-Code mit dem CodeExecutor aus.
Args:
code: Der auszuführende Python-Code
lucydom_interface: Interface für Datenbankzugriffe
context: Zusätzlicher Kontext
Returns:
Ergebnis der Codeausführung
"""
try:
# Systemfunktionen für den Code vorbereiten
system_functions_code = self._prepare_system_functions(lucydom_interface)
# Code mit Systemfunktionen erweitern
enhanced_code = system_functions_code + "\n\n" + code
# CodeExecutor initialisieren
available_modules = [
"modules.lucydom_interface",
"modules.lucydom_model",
"modules.agentservice_filehandling"
]
# Liste erlaubter Pakete
allowed_packages = None # None bedeutet alle erlaubt außer explizit blockierte
# Liste blockierter Pakete
blocked_packages = [
"cryptography", "flask", "django", "tornado", # Sicherheitsrisiken
"tensorflow", "pytorch", "scikit-learn" # Ressourcenintensiv
]
executor = CodeExecutor(
app_modules=available_modules,
timeout=60, # 60 Sekunden Timeout
max_memory_mb=512, # 512MB Speicherlimit
allowed_packages=allowed_packages,
blocked_packages=blocked_packages
)
try:
# Eingabedaten vorbereiten (falls vorhanden)
input_data = {
"context": context,
"workflow_id": context.get("workflow_id", "") if context else "",
}
# Dateireferenzen hinzufügen
if context and "documents" in context:
file_refs = []
for doc in context.get("documents", []):
source = doc.get("source", {})
if source.get("type") == "file":
file_refs.append({
"id": source.get("id", ""),
"name": source.get("name", ""),
"type": source.get("content_type", "")
})
input_data["files"] = file_refs
# Code ausführen
result = executor.execute_code(enhanced_code, input_data)
# Log für die Ausführung
if result.get("success", False):
logger.info(f"Code erfolgreich ausgeführt")
output = result.get("output", "")
if output:
logger.debug(f"Ausgabe: {output[:200]}..." if len(output) > 200 else output)
else:
logger.error(f"Fehler bei der Codeausführung: {result.get('error', 'Unbekannter Fehler')}")
return result
finally:
# Ressourcen freigeben
executor.cleanup()
except Exception as e:
logger.error(f"Fehler bei der Codeausführung: {str(e)}", exc_info=True)
return {
"success": False,
"output": "",
"error": f"Fehler bei der Ausführung: {str(e)}\n{traceback.format_exc()}",
"result": None
}
def _prepare_system_functions(self, lucydom_interface) -> str:
"""
Bereitet die Systemfunktionen für den auszuführenden Code vor.
Args:
lucydom_interface: Interface für Datenbankzugriffe
Returns:
Python-Code für die Systemfunktionen
"""
system_functions_code = """
# Systemfunktionen für den Code
async def load_file(file_id, encoding=None):
\"\"\"
Lädt eine Datei aus der Datenbank anhand ihrer ID.
Args:
file_id: ID der zu ladenden Datei
encoding: Optionale Kodierung (Standard: None für binäre Daten)
Returns:
Binäre Daten oder dekodierter String, je nach Encoding-Parameter
\"\"\"
try:
# lucydom_interface wird über Globals zur Verfügung gestellt
global lucydom_interface
if not lucydom_interface:
raise ValueError("LucyDOM-Interface nicht verfügbar")
# Dateiinhalt asynchron laden
file_content = await lucydom_interface.read_file_content(file_id)
if not file_content:
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
# Wenn Encoding angegeben ist, String zurückgeben
if encoding:
return file_content.decode(encoding)
# Andernfalls binäre Daten zurückgeben
return file_content
except Exception as e:
print(f"Fehler beim Laden der Datei {file_id}: {str(e)}")
raise
def save_file(content, file_name, content_type=None):
\"\"\"
Speichert Daten als Datei in der Datenbank.
Args:
content: Zu speichernde Daten (String oder Bytes)
file_name: Name der Datei
content_type: MIME-Typ der Datei (z.B. 'text/csv')
Returns:
Metadaten der gespeicherten Datei inkl. ID
\"\"\"
try:
# lucydom_interface wird über Globals zur Verfügung gestellt
global lucydom_interface
if not lucydom_interface:
raise ValueError("LucyDOM-Interface nicht verfügbar")
# Wenn der Inhalt ein String ist, in Bytes konvertieren
if isinstance(content, str):
content = content.encode('utf-8')
# Datei speichern
file_meta = lucydom_interface.save_uploaded_file(content, file_name)
# Wenn content_type angegeben ist, Datei-Metadaten aktualisieren
if content_type and "id" in file_meta:
update_data = {"content_type": content_type}
lucydom_interface.update_file(file_meta["id"], update_data)
file_meta["content_type"] = content_type
return file_meta
except Exception as e:
print(f"Fehler beim Speichern der Datei {file_name}: {str(e)}")
raise
def update_file(file_id, content, update_metadata=None):
\"\"\"
Aktualisiert eine bestehende Datei in der Datenbank.
Args:
file_id: ID der zu aktualisierenden Datei
content: Neue Inhalte für die Datei (String oder Bytes)
update_metadata: Optionale Metadaten-Updates
Returns:
Aktualisierte Metadaten der Datei
\"\"\"
try:
# lucydom_interface wird über Globals zur Verfügung gestellt
global lucydom_interface
if not lucydom_interface:
raise ValueError("LucyDOM-Interface nicht verfügbar")
# Wenn der Inhalt ein String ist, in Bytes konvertieren
if isinstance(content, str):
content = content.encode('utf-8')
# Temporäre Datei erstellen
import tempfile
import os
temp_file = tempfile.NamedTemporaryFile(delete=False)
temp_file.write(content)
temp_file.close()
# Bestehende Datei abrufen
file_meta = lucydom_interface.get_file(file_id)
if not file_meta:
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
# Datei mit neuen Inhalten aktualisieren
with open(temp_file.name, 'rb') as f:
updated_meta = lucydom_interface.save_uploaded_file(content, file_meta.get("name", "updated_file"))
# Temporäre Datei löschen
os.unlink(temp_file.name)
# Metadaten aktualisieren
if update_metadata and "id" in updated_meta:
lucydom_interface.update_file(updated_meta["id"], update_metadata)
updated_meta.update(update_metadata)
return updated_meta
except Exception as e:
print(f"Fehler beim Aktualisieren der Datei {file_id}: {str(e)}")
raise
def get_file_metadata(file_id):
\"\"\"
Ruft die Metadaten einer Datei ab.
Args:
file_id: ID der Datei
Returns:
Metadaten der Datei als Dictionary
\"\"\"
try:
# lucydom_interface wird über Globals zur Verfügung gestellt
global lucydom_interface
if not lucydom_interface:
raise ValueError("LucyDOM-Interface nicht verfügbar")
# Datei-Metadaten abrufen
file_meta = lucydom_interface.get_file(file_id)
if not file_meta:
raise ValueError(f"Datei mit ID {file_id} nicht gefunden")
return file_meta
except Exception as e:
print(f"Fehler beim Abrufen der Metadaten für Datei {file_id}: {str(e)}")
raise
def process_csv(content, operations=None):
\"\"\"
Verarbeitet CSV-Daten mit Pandas.
Args:
content: CSV-Daten als String oder Bytes
operations: Liste von Operationen, die auf den Daten ausgeführt werden sollen
[{"type": "filter", "column": "Name", "value": "Max"},
{"type": "groupby", "column": "Category"}]
Returns:
Ergebnis der Verarbeitung als Dictionary
\"\"\"
try:
import pandas as pd
import io
# Wenn der Inhalt Bytes ist, in String konvertieren
if isinstance(content, bytes):
content = content.decode('utf-8')
# CSV in DataFrame laden
df = pd.read_csv(io.StringIO(content))
# Wenn Operationen angegeben sind, diese durchführen
if operations:
for op in operations:
op_type = op.get("type", "").lower()
if op_type == "filter" and "column" in op and "value" in op:
df = df[df[op["column"]] == op["value"]]
elif op_type == "groupby" and "column" in op:
groupby_column = op["column"]
agg_column = op.get("aggregate_column")
agg_func = op.get("aggregate_function", "count")
if agg_column:
df = df.groupby(groupby_column).agg({agg_column: agg_func}).reset_index()
else:
df = df.groupby(groupby_column).size().reset_index(name='count')
# Ergebnis zurückgeben
return {
"data": df.to_dict('records'),
"columns": df.columns.tolist(),
"shape": df.shape
}
except Exception as e:
print(f"Fehler bei der CSV-Verarbeitung: {str(e)}")
raise
def extract_text_from_pdf(pdf_data):
\"\"\"
Extrahiert Text aus einem PDF-Dokument.
Args:
pdf_data: PDF-Daten als Bytes
Returns:
Extrahierter Text aus dem PDF
\"\"\"
try:
# Versuche PyPDF2 zu verwenden
try:
from PyPDF2 import PdfReader
from io import BytesIO
reader = PdfReader(BytesIO(pdf_data))
text = ""
for page in reader.pages:
text += page.extract_text() + "\\n\\n"
return text
except ImportError:
# Fallback auf pymupdf, falls PyPDF2 nicht verfügbar ist
try:
import fitz # PyMuPDF
from io import BytesIO
doc = fitz.open("pdf", pdf_data)
text = ""
for page in doc:
text += page.get_text() + "\\n\\n"
return text
except ImportError:
return "PDF-Extraktion fehlgeschlagen: Weder PyPDF2 noch PyMuPDF sind installiert"
except Exception as e:
print(f"Fehler bei der PDF-Extraktion: {str(e)}")
return f"Fehler bei der PDF-Extraktion: {str(e)}"
def analyze_image(image_data, analysis_type="description"):
\"\"\"
Analysiert ein Bild (KI-basiert, falls verfügbar).
Args:
image_data: Bilddaten als Bytes
analysis_type: Art der Analyse: 'description', 'objects', 'text'
Returns:
Ergebnis der Bildanalyse
\"\"\"
# Hinweis: Diese Funktion simuliert eine Bildanalyse,
# da die echte KI-Analyse eine async-Funktion erfordern würde
try:
# Bildgröße ermitteln
from io import BytesIO
from PIL import Image
image = Image.open(BytesIO(image_data))
width, height = image.size
format_name = image.format
# Simulierte Analyse basierend auf dem Bildtyp
analysis_result = {
"image_info": {
"width": width,
"height": height,
"format": format_name,
"size_bytes": len(image_data)
},
"analysis_type": analysis_type,
"analysis_result": f"Simulierte Bildanalyse für ein {format_name}-Bild ({width}x{height}px)"
}
return analysis_result
except Exception as e:
print(f"Fehler bei der Bildanalyse: {str(e)}")
return {"error": str(e)}
# lucydom_interface global verfügbar machen
import asyncio
"""
return system_functions_code
# Singleton-Instanz
_coder_agent = None
def get_coder_agent():
"""Gibt eine Singleton-Instanz des Coder-Agenten zurück"""
global _coder_agent
if _coder_agent is None:
_coder_agent = CoderAgent()
return _coder_agent

View file

@ -1,422 +0,0 @@
"""
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation.
"""
import logging
from typing import List, Dict, Any, Optional, Tuple
from modules.agentservice_base import BaseAgent
from connectors.connector_aichat_openai import ChatService
logger = logging.getLogger(__name__)
class DocumentationAgent(BaseAgent):
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
_instance = None
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz zurück"""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert den Dokumentations-Agenten"""
super().__init__()
self.id = "documentation_agent"
self.name = "Dokumentation"
self.type = "documentation"
self.description = "Erstellt Dokumentation und strukturierte Inhalte"
self.capabilities = "Berichte, Dokumentationen"
self.instructions = """
Du bist der Dokumentations-Agent. Deine Aufgabe:
1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen
2. Verschiedene Dokumentformate erstellen
3. Informationen aus verschiedenen Quellen strukturieren
4. Technische Konzepte verständlich erklären
5. Konsistente Formatierung sicherstellen
"""
# Chat-Service initialisieren
self.chat_service = None
self.result_format = "FormattedDocument"
def get_base_prompt(self, document_type: str = "") -> str:
"""
Generiert einen Basis-Prompt für den Dokumentations-Agenten.
Args:
document_type: Typ des zu erstellenden Dokuments
Returns:
Basis-Prompt für den Dokumentations-Agenten
"""
# Basis-Prompt
prompt = f"""
Du bist {self.name}, ein {self.type} Agent.
{self.description}
Fähigkeiten: {self.capabilities}
{self.instructions}
"""
# Dokumenttyp-spezifische Anweisungen hinzufügen
if document_type:
prompt += self._get_document_type_instructions(document_type)
return prompt.strip()
def _get_document_type_instructions(self, document_type: str) -> str:
"""
Gibt spezifische Anweisungen für einen bestimmten Dokumenttyp zurück.
Args:
document_type: Typ des Dokuments
Returns:
Spezifische Anweisungen für den Dokumenttyp
"""
document_type = document_type.lower()
if "handbuch" in document_type or "anleitung" in document_type or "guide" in document_type:
return "\n\nHANDBUCH: Beginne mit Zweckbeschreibung, strukturiere in logische Schritte, verwende direkte Anweisungen."
elif "bericht" in document_type or "report" in document_type:
return "\n\nBERICHT: Beginne mit Executive Summary, strukturiere in thematische Abschnitte, halte professionellen Ton."
elif "prozess" in document_type or "process" in document_type:
return "\n\nPROZESS: Beschreibe Zweck, Ziele, Beteiligte, sequenzielle Schritte, Inputs/Outputs und Verantwortlichkeiten."
elif "präsentation" in document_type or "presentation" in document_type:
return "\n\nPRÄSENTATION: Klare Hauptpunkte, visuelle Elemente, Einleitung-Hauptteil-Schluss Struktur."
else:
return "\n\nDOKUMENT: Erstelle ein gut strukturiertes Dokument mit klarer Gliederung und präziser Sprache."
def _detect_document_type(self, message: str) -> str:
"""
Erkennt den Dokumenttyp aus der Nachricht.
Args:
message: Nachricht des Benutzers
Returns:
Erkannter Dokumenttyp
"""
message = message.lower()
if "handbuch" in message or "anleitung" in message or "guide" in message:
return "handbuch"
elif "bericht" in message or "report" in message:
return "bericht"
elif "prozess" in message or "process" in message or "ablauf" in message:
return "prozess"
elif "präsentation" in message or "presentation" in message or "folien" in message:
return "präsentation"
else:
return "dokument"
async def generate_title(self, task: str, document_type: str) -> str:
"""
Generiert einen Titel für das Dokument.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
Returns:
Generierter Titel
"""
prompt = f"""
Erstelle einen prägnanten, professionellen Titel für folgendes {document_type.capitalize()}:
AUFTRAG: {task}
Gib NUR den Titel zurück, ohne weitere Erklärungen oder Formatierungen.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung von Dokumenttiteln."},
{"role": "user", "content": prompt}
]
title = await self.chat_service.call_api(messages)
# Bereinige den Titel von Anführungszeichen und Überschriften-Symbolen
title = title.strip('"\'#*- \n\t')
return title
async def generate_summary(self, task: str, document_type: str, title: str) -> str:
"""
Generiert eine Zusammenfassung für das Dokument.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
Returns:
Generierte Zusammenfassung
"""
prompt = f"""
Erstelle eine prägnante Zusammenfassung für folgendes Dokument:
TITEL: {title}
TYP: {document_type.capitalize()}
AUFTRAG: {task}
Die Zusammenfassung soll einen Überblick über den Zweck und die Hauptinhalte des Dokuments geben.
Sie sollte etwa 3-5 Sätze umfassen und als eigenständiger Abschnitt funktionieren.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung prägnanter Dokumentzusammenfassungen."},
{"role": "user", "content": prompt}
]
summary = await self.chat_service.call_api(messages)
return summary.strip()
async def generate_toc_with_prompts(self, task: str, document_type: str, title: str, summary: str) -> Dict[str, str]:
"""
Generiert ein Inhaltsverzeichnis mit Prompts für die einzelnen Kapitel.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
summary: Zusammenfassung des Dokuments
Returns:
Dict mit Kapiteltiteln als Schlüssel und Prompts als Werte
"""
prompt = f"""
Erstelle ein strukturiertes Inhaltsverzeichnis für folgendes Dokument:
TITEL: {title}
TYP: {document_type.capitalize()}
AUFTRAG: {task}
ZUSAMMENFASSUNG: {summary}
Für jedes Kapitel gib auch einen kurzen Prompt an, der beschreibt, was in diesem Kapitel behandelt werden soll.
Formatiere deine Antwort als JSON-Objekt mit folgendem Format:
{{
"Kapitel 1: Titel": "Prompt für Kapitel 1",
"Kapitel 2: Titel": "Prompt für Kapitel 2",
...
}}
Beschränke dich auf 5-7 sinnvolle Kapitel, die das Thema umfassend behandeln.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Strukturierung von Dokumenten und die Erstellung von Inhaltsverzeichnissen."},
{"role": "user", "content": prompt}
]
toc_response = await self.chat_service.call_api(messages)
# JSON aus der Antwort extrahieren
import json
import re
# Markdown-Code-Blöcke entfernen, falls vorhanden
toc_response = re.sub(r'```json\s*|\s*```', '', toc_response)
try:
toc_with_prompts = json.loads(toc_response)
return toc_with_prompts
except json.JSONDecodeError as e:
logger.error(f"Fehler beim Parsen des Inhaltsverzeichnisses: {str(e)}")
logger.error(f"Rohe Antwort: {toc_response}")
# Notfall-Fallback
return {
"1. Einleitung": "Einführung in das Thema und Überblick",
"2. Hauptteil": "Hauptinhalte des Dokuments",
"3. Schlussfolgerung": "Zusammenfassung und nächste Schritte"
}
async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str,
task: str, document_type: str, title: str, summary: str) -> str:
"""
Generiert den Inhalt für ein bestimmtes Kapitel.
Args:
chapter_title: Titel des Kapitels
chapter_prompt: Prompt für das Kapitel
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
summary: Zusammenfassung des Dokuments
Returns:
Generierter Kapitelinhalt
"""
prompt = f"""
Erstelle detaillierten Inhalt für folgendes Kapitel eines {document_type}s:
DOKUMENT-TITEL: {title}
AUFGABE: {task}
KAPITEL: {chapter_title}
ANWEISUNG FÜR DIESES KAPITEL: {chapter_prompt}
Der Inhalt sollte detailliert, informativ und gut strukturiert sein.
Verwende bei Bedarf Unterüberschriften, Aufzählungen und Tabellen zur besseren Strukturierung.
Der Inhalt sollte direkt mit dem Kapiteltext beginnen, ohne den Kapiteltitel zu wiederholen.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung hochwertiger Dokumentationsinhalte."},
{"role": "user", "content": prompt}
]
chapter_content = await self.chat_service.call_api(messages)
return chapter_content.strip()
def _format_final_document(self, title: str, summary: str, toc: Dict[str, str], chapter_contents: Dict[str, str]) -> str:
"""
Formatiert das endgültige Dokument aus allen Teilen.
Args:
title: Titel des Dokuments
summary: Zusammenfassung
toc: Inhaltsverzeichnis (Dict mit Kapiteltiteln als Schlüssel)
chapter_contents: Kapitelinhalte (Dict mit Kapiteltiteln als Schlüssel und Inhalten als Werte)
Returns:
Formatiertes Dokument
"""
# Titel formatieren
doc = f"# {title}\n\n"
# Zusammenfassung hinzufügen
doc += f"## Zusammenfassung\n\n{summary}\n\n"
# Inhaltsverzeichnis hinzufügen
doc += "## Inhaltsverzeichnis\n\n"
for idx, chapter in enumerate(toc.keys(), 1):
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
clean_chapter = chapter
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
clean_chapter = chapter.split('. ', 1)[1]
doc += f"{idx}. {clean_chapter}\n"
doc += "\n"
# Kapitelinhalte hinzufügen
for idx, (chapter, content) in enumerate(chapter_contents.items(), 1):
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
clean_chapter = chapter
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
clean_chapter = chapter.split('. ', 1)[1]
doc += f"## {idx}. {clean_chapter}\n\n{content}\n\n"
# Metadaten hinzufügen
doc += "---\n\n"
doc += f"**Erstellt durch:** {self.name}\n"
return doc
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext
Returns:
Die generierte Dokumentation
"""
try:
# Chat-Service initialisieren, falls noch nicht geschehen
if self.chat_service is None:
self.chat_service = ChatService()
# Task aus der Nachricht extrahieren
task = message.get("content", "")
if context and "task" in context:
task = context["task"]
# Dokumenttyp erkennen
document_type = self._detect_document_type(task)
logger.info(f"Starte Dokumentationserstellung für Typ: {document_type}")
# Schritt 1: Titel generieren
title = await self.generate_title(task, document_type)
logger.info(f"Titel generiert: {title}")
# Schritt 2: Zusammenfassung generieren
summary = await self.generate_summary(task, document_type, title)
logger.info("Zusammenfassung generiert")
# Schritt 3: Inhaltsverzeichnis mit Prompts generieren
toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary)
logger.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert")
# Schritt 4: Kapitelinhalte in einer Schleife generieren
chapter_contents = {}
for chapter_title, chapter_prompt in toc_with_prompts.items():
logger.info(f"Generiere Inhalt für Kapitel: {chapter_title}")
content = await self.generate_chapter_content(
chapter_title, chapter_prompt, task, document_type, title, summary
)
chapter_contents[chapter_title] = content
# Schritt 5: Dokument zusammenführen
final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents)
logger.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen")
# Schritt 6: Antwort zurückgeben
documentation_response = {
"role": "assistant",
"content": f"{final_document}\n\n[STATUS: ERGEBNIS]",
"agent_type": self.type
}
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
content, status = self.extract_status(documentation_response["content"])
documentation_response["content"] = content
# Setze den Status im Kontext, falls vorhanden
if context is not None:
context["status"] = status
# Chat-Service schließen
await self.chat_service.close()
self.chat_service = None
documentation_response["result_format"] = self.result_format
return documentation_response
except Exception as e:
logger.error(f"Fehler bei der Dokumentationserstellung: {str(e)}", exc_info=True)
# Chat-Service schließen bei Fehler
if self.chat_service:
try:
await self.chat_service.close()
except:
pass
self.chat_service = None
# Fehlerantwort zurückgeben
return {
"role": "assistant",
"content": f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}",
"agent_type": self.type
}
# Singleton-Instanz
_documentation_agent = None
def get_documentation_agent():
"""Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
global _documentation_agent
if _documentation_agent is None:
_documentation_agent = DocumentationAgent()
return _documentation_agent

View file

@ -1,168 +0,0 @@
"""
Filecreator-Agent für die Erstellung von Dateien mit Inhalten und deren Speicherung in der Datenbank (Fortsetzung).
"""
import logging
import base64
from typing import List, Dict, Any, Optional, Tuple
import uuid
from datetime import datetime
from modules.agentservice_base import BaseAgent
logger = logging.getLogger(__name__)
class FilecreatorAgent(BaseAgent):
"""Agent für die Erstellung und Speicherung von Dateien"""
# (Vorherige Implementierung hier)
def _extract_file_params(self, message_content: str) -> Dict[str, Any]:
"""
Extrahiert Dateiparameter aus dem Nachrichteninhalt.
Args:
message_content: Inhalt der Nachricht
Returns:
Dictionary mit Dateiparametern
"""
# Grundlegende Parameter
file_params = {
"name": "document.txt",
"content": "",
"type": "text/plain"
}
# Einfache Heuristik zur Extraktion der Parameter
lines = message_content.split('\n')
content_lines = []
is_content_section = False
for line in lines:
line = line.strip()
# Dateiname erkennen
if line.startswith("DATEINAME:") or line.startswith("FILENAME:"):
file_params["name"] = line.split(":", 1)[1].strip()
# Dateityp erkennen
elif line.startswith("TYP:") or line.startswith("TYPE:"):
file_type = line.split(":", 1)[1].strip().lower()
# MIME-Typ anhand der Angabe setzen
if file_type in ["text", "txt", "plain"]:
file_params["type"] = "text/plain"
if not file_params["name"].endswith(".txt"):
file_params["name"] += ".txt"
elif file_type in ["markdown", "md"]:
file_params["type"] = "text/markdown"
if not file_params["name"].endswith(".md"):
file_params["name"] += ".md"
elif file_type in ["csv"]:
file_params["type"] = "text/csv"
if not file_params["name"].endswith(".csv"):
file_params["name"] += ".csv"
elif file_type in ["json"]:
file_params["type"] = "application/json"
if not file_params["name"].endswith(".json"):
file_params["name"] += ".json"
elif file_type in ["html"]:
file_params["type"] = "text/html"
if not file_params["name"].endswith(".html"):
file_params["name"] += ".html"
# Inhalt sammeln
elif line == "INHALT:" or line == "CONTENT:":
is_content_section = True
continue
elif is_content_section:
content_lines.append(line)
# Wenn kein Inhalt gefunden wurde, versuche den gesamten Inhalt zu verwenden
if not content_lines and not is_content_section:
# Ignoriere die ersten und letzten Zeilen (können Anweisungen sein)
if len(lines) > 4:
content_lines = lines[2:-2]
else:
content_lines = lines
# Inhalt zusammensetzen
file_params["content"] = "\n".join(content_lines)
# Dateiformat aus dem Dateinamen ableiten, falls nicht explizit angegeben
if "type" not in file_params:
file_extension = file_params["name"].split(".")[-1].lower() if "." in file_params["name"] else ""
if file_extension == "md":
file_params["type"] = "text/markdown"
elif file_extension == "csv":
file_params["type"] = "text/csv"
elif file_extension == "json":
file_params["type"] = "application/json"
elif file_extension == "html":
file_params["type"] = "text/html"
else:
file_params["type"] = "text/plain"
return file_params
async def _create_and_save_file(self, file_params: Dict[str, Any], lucydom_interface) -> Tuple[str, str, str]:
"""
Erstellt und speichert eine Datei in der Datenbank.
Args:
file_params: Parameter für die Dateierstellung
lucydom_interface: Interface für Datenbankzugriffe
Returns:
Tuple mit (file_id, file_name, file_type)
"""
if not lucydom_interface:
raise ValueError("Kein LucyDOM-Interface verfügbar für die Dateispeicherung")
# Dateiparameter extrahieren
file_name = file_params.get("name", "document.txt")
file_content = file_params.get("content", "")
content_type = file_params.get("type", "text/plain")
# Dateityp aus dem Content-Type ableiten
file_type = "document" # Standard-Dateityp
if content_type.startswith("image/"):
file_type = "image"
# Binäre Dateidaten erstellen
file_data = file_content.encode('utf-8')
# Datei über LucyDOM-Interface speichern
try:
file_meta = lucydom_interface.save_uploaded_file(file_data, file_name)
if not file_meta or "id" not in file_meta:
raise ValueError("Fehler beim Speichern der Datei")
file_id = file_meta["id"]
# Dateityp aktualisieren, falls notwendig
update_data = {"type": file_type, "content_type": content_type}
lucydom_interface.update_file(file_id, update_data)
return file_id, file_name, file_type
except Exception as e:
logger.error(f"Fehler beim Speichern der Datei {file_name}: {str(e)}")
raise ValueError(f"Fehler beim Speichern der Datei: {str(e)}")
# Singleton-Instanz
_filecreator_agent = None
def get_filecreator_agent():
"""Gibt eine Singleton-Instanz des FileCreator-Agenten zurück"""
global _filecreator_agent
if _filecreator_agent is None:
_filecreator_agent = FilecreatorAgent()
return _filecreator_agent

View file

@ -1,175 +0,0 @@
"""
Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten.
"""
import logging
from typing import List, Dict, Any, Optional
from modules.agentservice_base import BaseAgent
logger = logging.getLogger(__name__)
class SharepointAgent(BaseAgent):
"""Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen"""
_instance = None
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz zurück"""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert den SharePoint-Agenten"""
super().__init__()
self.id = "sharepoint_agent"
self.name = "SharePoint-Agent"
self.type = "sharepoint"
self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen"
self.capabilities = "Suche und Abruf von Dokumenten aus SharePoint, Dokumentenverwaltung, Metadaten-Extraktion und Integration von SharePoint-Inhalten"
self.instructions = """
Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es:
1. SharePoint-Dokumente und -Ressourcen zu durchsuchen und abzurufen
2. Metadaten aus SharePoint-Dokumenten zu extrahieren und zu analysieren
3. Strukturierte Informationen aus SharePoint-Bibliotheken zu sammeln
4. Dokumente basierend auf Metadaten zu filtern und zu organisieren
5. Inhalte aus verschiedenen SharePoint-Quellen zu integrieren und zusammenzuführen
6. Informationen aus SharePoint-Listen und -Dokumentbibliotheken zu extrahieren
7. Zusammenfassungen und Analysen von SharePoint-Inhalten zu erstellen
Bei der Darstellung deiner Ergebnisse:
- Strukturiere die Informationen klar und übersichtlich
- Gib den Ursprung und die Metadaten der Dokumente an
- Zeige Beziehungen zwischen verschiedenen Dokumenten und Ressourcen auf
- Hebe wichtige Erkenntnisse und Muster hervor
- Biete Kontext und Relevanz für die gefundenen Informationen
"""
self.result_format = "DocumentList"
def get_prompt(self, message_context: Dict[str, Any]) -> str:
"""
Generiert einen angepassten Prompt für den SharePoint-Agenten.
Args:
message_context: Kontext der Nachricht
Returns:
Formatierter Prompt für den SharePoint-Agenten
"""
# Basis-Prompt vom BaseAgent holen
base_prompt = super().get_prompt(message_context)
# Zusätzliche Anweisungen für SharePoint-Interaktion
sharepoint_instructions = """
SHAREPOINT-INTERAKTIONS-RICHTLINIEN:
1. Präzisiere die Suchkriterien für SharePoint-Ressourcen
2. Identifiziere relevante Bibliotheken, Listen und Standorte
3. Definiere benötigte Metadaten und Inhalte
4. Berücksichtige Berechtigungsanforderungen
5. Priorisiere aktuelle und relevante Dokumente
6. Stelle eine strukturierte Darstellung der Ergebnisse sicher
Für eine gute SharePoint-Integration:
- Gib detaillierte Pfade und Standorte an
- Berücksichtige verschiedene Dokumenttypen und Formate
- Zeige Metadaten und Dokumenteigenschaften
- Biete Kontext zu den gefundenen Ressourcen
- Berücksichtige Versionsinformationen
"""
# Task aus dem Kontext extrahieren
task = message_context.get("task", "")
task_instructions = f"\nSHAREPOINT-AUFTRAG:\n{task}\n" if task else ""
# Vollständigen Prompt zusammenbauen
complete_prompt = f"{base_prompt}\n\n{sharepoint_instructions}\n{task_instructions}"
return complete_prompt.strip()
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und interagiert mit SharePoint.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext
Returns:
Die generierte Antwort mit SharePoint-Inhalten
"""
# Hier würde die tatsächliche Interaktion mit SharePoint stattfinden
# In der finalen Implementierung würde ein SharePoint-Connector verwendet werden
# Als Beispiel geben wir eine Standardantwort zurück
sharepoint_response = {
"role": "assistant",
"content": f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:
## SharePoint-Suchergebnisse
Basierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert:
### Dokumente
1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025)
- Standort: Projekte/Strategische Planung
- Autor: Maria Schmidt
- Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine
2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025)
- Standort: Marketing/Marktforschung
- Autor: Thomas Müller
- Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken
3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025)
- Standort: Finanzen/Planung
- Autor: Sarah Weber
- Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen
### SharePoint-Listen
1. **Projektstatusliste**
- 12 Einträge mit relevanten Projektstatusinformationen
- Letzte Aktualisierung: 25.03.2025
## Zusammenfassung der Inhalte
Die gefundenen Dokumente zeigen übereinstimmend, dass:
- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt
- Das Budget für Forschung und Entwicklung um 15% erhöht wurde
- Drei neue Hauptprojekte im zweiten Quartal starten werden
## Empfehlungen
Basierend auf den gefundenen Informationen empfehle ich:
1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen
2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen
3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden
Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt.
[STATUS: ERGEBNIS]""",
"agent_type": self.type
}
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
content, status = self.extract_status(sharepoint_response["content"])
sharepoint_response["content"] = content
# Setze den Status im Kontext, falls vorhanden
if context is not None:
context["status"] = status
sharepoint_response["result_format"] = self.result_format
return sharepoint_response
# Singleton-Instanz
_sharepoint_agent = None
def get_sharepoint_agent():
"""Gibt eine Singleton-Instanz des SharePoint-Agenten zurück"""
global _sharepoint_agent
if _sharepoint_agent is None:
_sharepoint_agent = SharepointAgent()
return _sharepoint_agent

View file

@ -1,512 +0,0 @@
"""
WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
"""
import json
import logging
import random
import time
from typing import List, Dict, Any, Optional
import urllib
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
from modules.agentservice_base import BaseAgent
from connectors.connector_aichat_openai import ChatService
logger = logging.getLogger(__name__)
class WebcrawlerAgent(BaseAgent):
"""Agent für Web-Recherche und Informationsbeschaffung"""
_instance = None
chat_service = ChatService()
#INIT --> should go to config
max_url=3
max_key=3
max_result=3
timeout = 10
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Language': 'en-US,en;q=0.5',
'Referer': 'https://www.google.com/',
'DNT': '1',
'Connection': 'keep-alive',
'Upgrade-Insecure-Requests': '1',
}
max_urls = 10
max_content_length=100000
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz zurück"""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert den WebCrawler-Agenten"""
super().__init__()
self.id = "webcrawler_agent"
self.name = "Webscraper"
self.type = "scraper"
self.description = "Recherchiert Informationen im Web"
self.capabilities = "Informationsrecherche, Datenbeschaffung aus dem Web, Quellenbewertung und Zusammenführung von Online-Informationen"
self.instructions = ""
self.result_format = "SearchResults"
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
try:
# Führe die Web-Recherche durch und warte auf das Ergebnis mit await
web_query_result = await self.get_web_query(message)
# Antwort-Objekt erstellen
response = {
"role": "assistant",
"content": f"{web_query_result} [STATUS: ERGEBNIS]",
"agent_type": self.type
}
# Extrahiere den Status aus der Antwort und aktualisiere den Inhalt
content, status = self.extract_status(response["content"])
response["content"] = content
# Setze den Status im Kontext, falls vorhanden
if context is not None:
context["status"] = status
response["result_format"] = self.result_format
return response
except Exception as e:
logger.error(f"Fehler bei der Web-Recherche: {str(e)}", exc_info=True)
# Fehlerantwort zurückgeben
return {
"role": "assistant",
"content": f"Bei der Web-Recherche ist ein Fehler aufgetreten: {str(e)}",
"agent_type": self.type
}
async def get_web_query(self, message_context: Dict[str, Any]) -> str:
prompt = await self.get_prompt(message_context)
result_json = await self.run_web_query(prompt)
result_data = ""
summary_src = ""
logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.")
if isinstance(result_json, list):
for i, result in enumerate(result_json, 1):
web_answer_instructions = f"""
Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
Dies ist das Resultat:
{result['data']}
"""
# Zusätzliche Anweisungen für Web-Recherche
content_text = await self.chat_service.call_api(
messages=[
{
"role": "system",
"content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."
},
{
"role": "user",
"content": web_answer_instructions
}
]
)
result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_text}"
summary_src+=f"\n{content_text}"
else:
result_data = "no data received"
logger.info(f"Web analysis result sent {len(result_data)}B")
# Zusätzliche Zusammenfassung
summary=""
if len(summary_src)>1:
summary = await self.chat_service.call_api(
messages=[
{
"role": "system",
"content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen."
},
{
"role": "user",
"content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src}\n"
}
]
)
result = f"{summary}\n\n{result_data}"
return result
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
task = message_context.get("content", "")
return task.strip()
async def run_web_query(self, prompt: str) -> List[Dict]:
if prompt=="":
return []
ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
"""
content_text = await self.chat_service.call_api(
messages=[
{
"role": "system",
"content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
},
{
"role": "user",
"content": ptext
} ]
)
# Remove markdown formatting if present
if content_text.startswith("```json"):
# Find the end of the JSON block
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
# Extract the JSON content without the markdown markers
content_text = content_text[7:end_index].strip()
# Now parse the JSON
try:
logger.info(f"Valid json received: {str(content_text)}")
pjson = json.loads(content_text)
# Now call scrape_json with the parsed dictionary
result_json = await self.scrape_json(pjson)
return result_json
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
logger.error(f"Cleaned content: {content_text[:100]}...")
return []
async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
"""
Scrapes web content based on a research strategy JSON.
Args:
research_strategy: A dictionary containing:
- 'skey': List of search keywords
- 'url': List of direct URLs to scrape
Returns:
Dictionary with URLs as keys and scraped content as values
"""
logger.info("Starting JSON-based web scraping")
results = []
# Validate input structure
if not isinstance(research_strategy, dict):
logger.error("Invalid research_strategy format: not a dictionary")
return {"error": "Invalid research_strategy format: not a dictionary"}
keys = research_strategy.get("skey", [])
direct_urls = research_strategy.get("url", [])
if not isinstance(keys, list) or not isinstance(direct_urls, list):
logger.error("Invalid research_strategy format: keys, or url is not a list")
return {"error": "Invalid research_strategy format: keys, or url is not a list"}
# Process search keywords through search engine
for keyword in keys:
logger.info(f"Processing keyword: {keyword}")
found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data
logger.info(f"... {len(found_results)} results found")
results.extend(found_results)
# Process direct URLs
logger.info(f"Processing {len(direct_urls)} direct URLs")
for url in direct_urls:
if url in results:
logger.info(f"Skipping already scraped URL: {url}")
continue
soup=self.read_url(url)
# Extract title from the page if it exists
if isinstance(soup, BeautifulSoup):
title_tag = soup.find('title')
title = title_tag.text.strip() if title_tag else "No title"
# Alternative: You could also look for h1 tags if the title tag is missing
if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
else:
# Handle the case where soup is an error message string
title = "Error fetching page"
results.append(self.parse_result(soup,"No title",url))
logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
return results
def search_web(self, query: str) -> List[Dict]:
formatted_query = quote_plus(query)
url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
search_results_soup = self.read_url(url)
if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
return []
# Extract search results
results = []
# Find all result containers
result_elements = search_results_soup.select('.result')
for result in result_elements:
# Extract title
title_element = result.select_one('.result__a')
title = title_element.text.strip() if title_element else 'No title'
# Extract URL (DuckDuckGo uses redirects, need to extract from href param)
url_element = title_element.get('href') if title_element else ''
extracted_url = 'No URL'
if url_element:
# Extract the actual URL from DuckDuckGo's redirect
if url_element.startswith('/d.js?q='):
start = url_element.find('?q=') + 3 # Skip '?q='
end = url_element.find('&', start) if '&' in url_element[start:] else None
extracted_url = unquote(url_element[start:end])
# Make sure the URL has the correct protocol prefix
if not extracted_url.startswith(('http://', 'https://')):
if not extracted_url.startswith('//'):
extracted_url = 'https://' + extracted_url
else:
extracted_url = 'https:' + extracted_url
else:
extracted_url = url_element
# Extract snippet directly from search results page
snippet_element = result.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
# Now fetch the actual page content for the data field
target_page_soup = self.read_url(extracted_url)
results.append({
'title': title,
'url': extracted_url,
'snippet': snippet,
'data': str(target_page_soup) if isinstance(target_page_soup, BeautifulSoup) else "Error fetching page"
})
# Limit the number of results if needed
if len(results) >= self.max_result:
break
return results
def read_url(self, url: str) -> BeautifulSoup:
"""
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
Args:
url: Die zu lesende URL
Returns:
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
import time
# Initialer Request
response = requests.get(url, headers=headers, timeout=10)
# Polling für Status 202
if response.status_code == 202:
# Maximal 3 Versuche mit steigenden Intervallen
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s
for wait_time in backoff_times:
time.sleep(wait_time) # Warten mit steigender Zeit
response = requests.get(url, headers=headers, timeout=10)
# Wenn kein 202 mehr, dann abbrechen
if response.status_code != 202:
break
# Für andere Fehler-Status einen Fehler auslösen
response.raise_for_status()
# HTML parsen
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
# Leeres BeautifulSoup-Objekt erstellen
return BeautifulSoup("<html><body></body></html>", 'html.parser')
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
# Extract snippet/description
snippet_element = data.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
result={
'title': title,
'url': url,
'snippet': snippet,
'data': data.prettify()
}
return result
def _old_scrape_url(self, url: str) -> str:
try:
logger.info(f"Requesting URL: {url}")
response = requests.get(url, headers=self.headers, timeout=self.timeout)
response.raise_for_status()
soup = BeautifulSoup(response.text, 'html.parser')
title = soup.title.string if soup.title else "No title"
for element in soup.select('script, style, meta, noscript, iframe, nav, footer, header, aside'):
element.extract()
main_content = ""
# Common content containers
content_selectors = [
'main', '#main', '.main',
'article', '.article',
'#content', '.content',
'.post', '#post',
'.entry-content', '.post-content',
'.page-content', '.article-content'
]
# Try each selector
for selector in content_selectors:
elements = soup.select(selector)
if elements:
main_content = elements[0].get_text(separator='\n', strip=True)
logger.info(f"Found content using selector: {selector}")
break
# If no main content found, use body text
if not main_content:
main_content = soup.body.get_text(separator='\n', strip=True)
logger.info("Using body text as no main content container found")
# Clean up the text
lines = []
for line in main_content.split('\n'):
line = line.strip()
if line and len(line) > 15: # Skip very short lines
lines.append(line)
main_content = '\n'.join(lines)
# Truncate if too long
if len(main_content) > self.max_content_length:
main_content = main_content[:self.max_content_length] + "...\n[Inhalt gekürzt]"
return main_content.strip()
except Exception as e:
logger.error(f"Fehler beim Scrapen von {url}: {str(e)}")
return f"[Fehler beim Scrapen von {url}: {str(e)}]"
def _old_extract_urls_from_search_results(self, html_content: str) -> List[str]:
"""
Extracts URLs from search engine results.
Args:
html_content: HTML content of the search results page
Returns:
List of extracted URLs
"""
soup = BeautifulSoup(html_content, 'html.parser')
urls = []
# Different search engines have different HTML structures
# Google links
for a_tag in soup.select('a[href^="/url?"]'):
href = a_tag.get('href', '')
if '/url?q=' in href:
url = href.split('/url?q=')[1].split('&')[0]
url = urllib.parse.unquote(url)
if url.startswith('http') and url not in urls:
urls.append(url)
# Bing links
for a_tag in soup.select('a[href^="http"]'):
url = a_tag.get('href', '')
excluded_domains = getattr(self, 'excluded_domains', [])
if (url.startswith('http') and
not any(domain in url for domain in excluded_domains) and
url not in urls):
urls.append(url)
# Yahoo links
for a_tag in soup.select('a.d-ib'):
url = a_tag.get('href', '')
if url.startswith('http') and url not in urls:
urls.append(url)
# If no URLs found, try a more generic approach
if not urls:
for a_tag in soup.find_all('a', href=True):
url = a_tag['href']
excluded_domains = getattr(self, 'excluded_domains', [])
if (url.startswith('http') and
not any(domain in url for domain in excluded_domains) and
url not in urls):
urls.append(url)
# Limit the number of results
return urls[:self.max_urls]
# Singleton-Instanz
_webcrawler_agent = None
def get_webcrawler_agent():
"""Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück"""
global _webcrawler_agent
if _webcrawler_agent is None:
_webcrawler_agent = WebcrawlerAgent()
return _webcrawler_agent

View file

@ -1,124 +0,0 @@
"""
Erweiterte Basisklasse für Agenten im Agentservice.
Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit.
"""
import logging
from typing import List, Dict, Any, Optional, Tuple
logger = logging.getLogger(__name__)
class BaseAgent:
"""Basisklasse für alle Agenten im System"""
def __init__(self):
"""Initialisiert den Basis-Agenten"""
self.id = "base_agent"
self.name = "Base Agent"
self.type = "base"
self.description = "Basisagent als Vorlage für spezialisierte Agenten"
self.capabilities = "Grundlegende Agentenoperationen"
self.instructions = """
Als Basis-Agent kannst du grundlegende Aufgaben erledigen.
Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden.
"""
# Neues Attribut für das Rückgabeformat
self.result_format = "Text" # Standard: Textformat
def get_agent_info(self) -> Dict[str, Any]:
"""
Gibt Informationen über den Agenten zurück.
Returns:
Dict mit Agenten-Informationen
"""
return {
"id": self.id,
"name": self.name,
"type": self.type,
"description": self.description,
"capabilities": self.capabilities,
"instructions": self.instructions,
"result_format": self.result_format, # Rückgabeformat hinzugefügt
"used": False, # Wird zur Laufzeit aktualisiert
"last_result_status": None # Wird zur Laufzeit aktualisiert
}
def get_prompt(self, message_context: Dict[str, Any]) -> str:
"""
Generiert einen an den Agenten angepassten Prompt basierend auf Kontext.
Args:
message_context: Kontext der Nachricht
Returns:
Formatierter Prompt für den Agenten
"""
# Basis-Prompt, der von spezialisierten Agenten überschrieben werden kann
base_prompt = f"""
Du bist {self.name}, ein {self.type} Agent.
{self.description}
Deine Fähigkeiten: {self.capabilities}
{self.instructions}
Rückgabeformat: {self.result_format}
Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage.
Deklariere am Ende deiner Antwort den Status deines Ergebnisses:
[STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast
[STATUS: TEILWEISE] - Wenn du ein teilweises Ergebnis geliefert hast
[STATUS: PLAN] - Wenn du nur einen Plan vorgeschlagen hast
"""
return base_prompt.strip()
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und generiert eine Antwort.
Sollte von spezialisierten Agenten überschrieben werden.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext (optional)
Returns:
Die generierte Antwort
"""
# Basis-Implementierung, die einfach eine Standardantwort zurückgibt
return {
"role": "assistant",
"content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]",
"agent_type": self.type,
"result_format": self.result_format # Rückgabeformat in der Antwort
}
def extract_status(self, content: str) -> Tuple[str, str]:
"""
Extrahiert den Status aus dem Inhalt der Antwort.
Args:
content: Inhalt der Antwort
Returns:
Tuple mit (bereinigter Text, Status)
"""
import re
# Standard-Status, falls keine Deklaration gefunden wird
status = "UNBEKANNT"
# Suche nach Status-Deklaration
status_pattern = r'\[STATUS:\s*(ERGEBNIS|TEILWEISE|PLAN)\]'
match = re.search(status_pattern, content, re.IGNORECASE)
if match:
# Extrahiere den Status
status = match.group(1).upper()
# Entferne die Status-Deklaration aus dem Text
content = re.sub(status_pattern, '', content, flags=re.IGNORECASE).strip()
return content, status

View file

@ -1,556 +0,0 @@
# code_executor.py
import os
import sys
import uuid
import subprocess
import tempfile
import re
from typing import Dict, List, Optional, Tuple, Any
import importlib.util
import logging
# Logging einrichten
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class CodeExecutor:
"""
Führt generierten Code in einer isolierten virtuellen Umgebung aus,
während Zugriff auf spezifische App-Module gewährt wird und
automatisch erforderliche Pakete installiert werden.
"""
def __init__(self,
app_modules: List[str] = None,
venv_path: Optional[str] = None,
timeout: int = 30,
max_memory_mb: int = 512,
allowed_packages: List[str] = None,
blocked_packages: List[str] = None):
"""
Initialisiert den CodeExecutor.
Args:
app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen
venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt
timeout: Maximale Ausführungszeit in Sekunden
max_memory_mb: Maximaler Arbeitsspeicher in MB
allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte)
blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive)
"""
self.app_modules = app_modules or []
self.venv_path = venv_path
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.allowed_packages = allowed_packages
self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"]
def _create_venv(self) -> str:
"""Erstellt eine virtuelle Umgebung und gibt den Pfad zurück."""
if self.venv_path and os.path.exists(self.venv_path):
return self.venv_path
# Temporäres Verzeichnis für die virtuelle Umgebung erstellen
self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_")
venv_path = os.path.join(self.temp_dir, "venv")
try:
# Virtuelle Umgebung erstellen
logger.info(f"Erstelle virtuelle Umgebung in {venv_path}")
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
return venv_path
except subprocess.CalledProcessError as e:
logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}")
raise RuntimeError(f"Konnte venv nicht erstellen: {e}")
def _get_pip_executable(self, venv_path: str) -> str:
"""Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "pip.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "pip")
def _get_python_executable(self, venv_path: str) -> str:
"""Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]:
"""
Installiert Pakete in der virtuellen Umgebung.
Args:
packages: Liste der zu installierenden Pakete
venv_path: Pfad zur virtuellen Umgebung
Returns:
Tuple aus (Erfolg, Fehlermeldung)
"""
if not packages:
return True, ""
# Überprüfen, ob Pakete erlaubt sind
blocked = []
for package in packages:
# Paketname ohne Version extrahieren
pkg_name = re.split('[=<>]', package)[0].strip()
if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]:
blocked.append(pkg_name)
if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]:
blocked.append(pkg_name)
if blocked:
return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}"
# Pakete installieren
pip_executable = self._get_pip_executable(venv_path)
logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}")
try:
# pip aktualisieren
subprocess.run(
[pip_executable, "install", "--upgrade", "pip"],
check=True,
capture_output=True,
timeout=60
)
# Pakete installieren
process = subprocess.run(
[pip_executable, "install"] + packages,
check=True,
capture_output=True,
text=True,
timeout=120 # 2 Minuten Timeout für Paketinstallation
)
return True, process.stdout
except subprocess.CalledProcessError as e:
error_msg = f"Fehler bei der Paketinstallation: {e.stderr}"
logger.error(error_msg)
return False, error_msg
except subprocess.TimeoutExpired:
return False, "Zeitüberschreitung bei der Paketinstallation."
except Exception as e:
return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}"
def _extract_required_packages(self, code: str) -> List[str]:
"""
Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements
und Pip-Installationsanweisungen.
Args:
code: Der Python-Code
Returns:
Liste der erkannten Paketnamen
"""
packages = set()
# Paketkommentare erkennen (# pip install package)
pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
for comment in pip_comments:
for pkg in comment.split():
if pkg and not pkg.startswith('-'):
packages.add(pkg.strip())
# Import-Statements analysieren
import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
# Standardmodule, die nicht installiert werden müssen
std_modules = {
'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
'collections', 'itertools', 'functools', 'pathlib', 'shutil',
'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
'traceback', 'io', 'copy'
}
# Module der App, die nicht installiert werden müssen
app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules)
for module in import_lines:
if module not in std_modules and module not in app_modules_prefixes:
packages.add(module)
return list(packages)
def _create_module_loader(self) -> str:
"""
Erstellt ein Hilfsskript, das App-Module in die venv importiert.
Gibt den Pfad zum Hilfsskript zurück.
"""
if not self.app_modules:
return ""
# Temporäre Datei für den Module-Loader erstellen
module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
"module_loader.py")
# Pfad zu den App-Modulen bestimmen
app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
# Modul-Loader-Code generieren
loader_code = f"""
import sys
import importlib.util
import os
# App-Pfad zum Suchpfad hinzufügen
sys.path.insert(0, "{app_path}")
# Module importieren
modules = {{}}
"""
# Code zum Importieren der Module hinzufügen
for module_name in self.app_modules:
loader_code += f"""
try:
modules["{module_name}"] = __import__("{module_name}", fromlist=["*"])
print(f"Modul '{module_name}' erfolgreich importiert")
except ImportError as e:
print(f"Fehler beim Importieren von '{module_name}': {{e}}")
"""
# Loader-Datei schreiben
with open(module_loader_path, "w") as f:
f.write(loader_code)
return module_loader_path
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Führt den generierten Code in einer isolierten Umgebung aus.
Args:
code: Der auszuführende Python-Code
input_data: Eingabedaten für den Code (werden als JSON serialisiert)
Returns:
Dict mit Ausführungsergebnissen, Ausgabe und Fehlern
"""
# Virtuelle Umgebung erstellen oder bestehende verwenden
venv_path = self._create_venv()
# Erforderliche Pakete aus dem Code extrahieren
required_packages = self._extract_required_packages(code)
# Pakete installieren, falls erforderlich
install_success = True
install_log = ""
if required_packages:
install_success, install_log = self._install_packages(required_packages, venv_path)
if not install_success:
return {
"success": False,
"output": "",
"error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}",
"result": None,
"installed_packages": required_packages
}
# Temporäre Datei für den Code erstellen
code_id = str(uuid.uuid4())[:8]
code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"ai_code_{code_id}.py")
# Module-Loader erstellen
module_loader_path = self._create_module_loader()
# Eingabedaten als JSON speichern, wenn vorhanden
input_path = ""
if input_data:
import json
input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"input_{code_id}.json")
with open(input_path, "w") as f:
json.dump(input_data, f)
# Outputpfad für Ergebnisse
output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"output_{code_id}.json")
# Wrapper für den Code erstellen, damit die App-Module verfügbar sind
wrapped_code = f"""
import sys
import json
import traceback
import os
# Ergebnisstruktur
result = {{
"success": False,
"output": "",
"error": "",
"result": None,
"installed_packages": {required_packages}
}}
try:
# Module laden, falls erforderlich
if "{module_loader_path}":
module_loader = __import__("module_loader")
globals().update({{k: v for k, v in module_loader.modules.items()}})
# Eingabedaten laden, falls vorhanden
input_data = None
if "{input_path}":
with open("{input_path}", "r") as f:
input_data = json.load(f)
# Ausgabeumleitung
from io import StringIO
original_stdout = sys.stdout
original_stderr = sys.stderr
captured_stdout = StringIO()
captured_stderr = StringIO()
sys.stdout = captured_stdout
sys.stderr = captured_stderr
# Benutzercode ausführen
try:
# Den Code in einem lokalen Namespace ausführen
local_vars = {{"input_data": input_data}}
exec('''{code}''', globals(), local_vars)
# Ergebnis speichern, falls eine Variable 'result' definiert wurde
if "result" in local_vars:
result["result"] = local_vars["result"]
result["success"] = True
except Exception as e:
result["error"] = str(e)
result["error"] += "\\n" + traceback.format_exc()
finally:
# Ausgabe erfassen
result["output"] = captured_stdout.getvalue()
result["error"] += captured_stderr.getvalue()
# Ausgabeumleitung zurücksetzen
sys.stdout = original_stdout
sys.stderr = original_stderr
except Exception as outer_e:
result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}"
# Ergebnis speichern
with open("{output_path}", "w") as f:
json.dump(result, f, default=str)
"""
# Code in temporäre Datei schreiben
with open(code_file_path, "w") as f:
f.write(wrapped_code)
# Python-Interpreter aus der virtuellen Umgebung bestimmen
python_executable = self._get_python_executable(venv_path)
# Code ausführen
logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}")
try:
# Prozess mit Ressourcenbeschränkungen ausführen
cmd = [python_executable, code_file_path]
# Umgebungsvariablen setzen, um Speicherlimit zu erzwingen
env = os.environ.copy()
if self.max_memory_mb:
if os.name == 'posix': # Unix/Linux
# Auf Unix-Systemen können wir ulimit verwenden
cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"]
elif os.name == 'nt': # Windows
# Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden
# Hier müsste eine komplexere Lösung implementiert werden
pass
# Prozess starten und mit Timeout ausführen
process = subprocess.run(
cmd,
timeout=self.timeout,
env=env,
capture_output=True,
text=True
)
# Ergebnis aus der Ausgabedatei lesen
if os.path.exists(output_path):
with open(output_path, "r") as f:
import json
execution_result = json.load(f)
else:
execution_result = {
"success": False,
"output": process.stdout,
"error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}",
"result": None,
"installed_packages": required_packages
}
except subprocess.TimeoutExpired:
execution_result = {
"success": False,
"output": "",
"error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)",
"result": None,
"installed_packages": required_packages
}
except Exception as e:
execution_result = {
"success": False,
"output": "",
"error": f"Fehler bei der Ausführung: {str(e)}",
"result": None,
"installed_packages": required_packages
}
# Informationen zur Paketinstallation hinzufügen
if install_log:
execution_result["package_install_log"] = install_log
# Temporäre Dateien aufräumen
self._cleanup_temp_files([code_file_path, input_path, output_path])
return execution_result
def _cleanup_temp_files(self, file_paths: List[str]):
"""Räumt temporäre Dateien auf."""
for path in file_paths:
if path and os.path.exists(path):
try:
os.remove(path)
except Exception as e:
logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}")
def cleanup(self):
"""Räumt alle temporären Ressourcen auf."""
if self.temp_dir and os.path.exists(self.temp_dir):
import shutil
try:
shutil.rmtree(self.temp_dir)
logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
except Exception as e:
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}")
def __del__(self):
"""Aufräumen beim Garbage Collection."""
self.cleanup()
# Beispiel zur Verwendung des erweiterten CodeExecutor in einem AI Chat
# from code_executor import CodeExecutor
def execute_ai_generated_code(prompt_result: str, input_data=None):
"""
Führt von einer KI generierten Code aus und installiert automatisch benötigte Pakete
Args:
prompt_result: Der von der KI generierte Python-Code
input_data: Optionale Eingabedaten für den Code
Returns:
Ergebnis der Code-Ausführung
"""
# Verfügbare App-Module definieren
available_modules = [
"utils.sharepoint_crud",
# Weitere Module hier hinzufügen
]
# Liste erlaubter Pakete (optional)
allowed_packages = None # None bedeutet alle erlaubt, außer blockierte
# Liste blockierter Pakete (Sicherheitsrisiken oder ressourcenintensive Pakete)
blocked_packages = [
"cryptography", "flask", "django", "tornado", # Sicherheit
"tensorflow", "pytorch", "scikit-learn", # Ressourcenintensiv
]
# CodeExecutor initialisieren
executor = CodeExecutor(
app_modules=available_modules,
timeout=120, # 2 Minuten Timeout
max_memory_mb=1024, # 1GB Speicherlimit
allowed_packages=allowed_packages,
blocked_packages=blocked_packages
)
try:
# Code ausführen
result = executor.execute_code(prompt_result, input_data)
if result["success"]:
print("Code erfolgreich ausgeführt!")
print(f"Ausgabe: {result['output']}")
# Zeige installierte Pakete an
if "installed_packages" in result and result["installed_packages"]:
print(f"Installierte Pakete: {', '.join(result['installed_packages'])}")
return result["result"]
else:
print(f"Fehler bei der Ausführung: {result['error']}")
return None
finally:
# Aufräumen
executor.cleanup()
# Beispiel für die Verwendung
if __name__ == "__main__":
# Angenommen, dies ist der von der KI generierte Code mit Paketabhängigkeiten
ai_generated_code = """
# pip install pandas matplotlib
import pandas as pd
import matplotlib.pyplot as plt
import utils.sharepoint_crud as sp
# Daten aus input_data verwenden
file_path = input_data.get('file_path')
site_url = input_data.get('site_url')
# Beispieldaten erstellen
data = pd.DataFrame({
'Monat': ['Jan', 'Feb', 'Mär', 'Apr', 'Mai'],
'Umsatz': [1200, 1400, 1300, 1500, 1800]
})
# Plot erstellen
plt.figure(figsize=(10, 6))
plt.bar(data['Monat'], data['Umsatz'])
plt.title('Umsatz nach Monat')
plt.savefig('umsatz_plot.png')
print('Diagramm erstellt und gespeichert')
# SharePoint-Datei hochladen
result = sp.upload_file(file_path, site_url)
print(f"Datei wurde hochgeladen: {result}")
# Ergebnis zurückgeben
result = {
'data': data.to_dict(),
'plot_saved': True,
'upload_result': result
}
"""
# Daten für den Code bereitstellen
data = {
"file_path": "/path/to/document.docx",
"site_url": "https://example.sharepoint.com/sites/mysite"
}
# Code ausführen
execute_ai_generated_code(ai_generated_code, data)

View file

@ -1,475 +0,0 @@
"""
Hilfsfunktion für die intelligente Extraktion von Dateninhalten (Fortsetzung).
"""
from datetime import datetime
import logging
import json
from typing import List, Dict, Any, Optional, Tuple
import asyncio
import copy
# Import erweiterte Dateiverarbeitung
from gateway.gwserver.modules.agentservice_filemanager import extract_text_from_file_content
logger = logging.getLogger(__name__)
async def data_extraction(
prompt: str,
files: List[Dict[str, Any]],
messages: List[Dict[str, Any]],
ai_service,
lucydom_interface = None,
workflow_id: str = None,
add_log_func = None
) -> Dict[str, Any]:
"""
Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen,
und führt dann die notwendigen Extraktionen durch.
Args:
prompt: Spezifizierung, welche Daten extrahiert werden sollen
files: Liste aller verfügbaren Dateien mit Metadaten
messages: Liste aller Nachrichten im Workflow
ai_service: Service für KI-Anfragen
lucydom_interface: Interface für Datenbankzugriffe (optional)
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
Returns:
Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen
"""
try:
# 1. AI Call zur Bestimmung der notwendigen Extraktionen
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
# 2. Extraktionen durchführen
extracted_data = await _execute_extractions(
extraction_plan,
files,
lucydom_interface,
ai_service,
workflow_id,
add_log_func
)
# 3. Extrahierte Daten strukturieren
structured_result = _structure_extracted_data(extracted_data, files, prompt)
return structured_result
except Exception as e:
logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True)
# Fehler-Log hinzufügen
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error")
# Fehler-Ergebnis zurückgeben
return {
"error": str(e),
"status": "error",
"files_processed": len(files),
"message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}"
}
async def _create_extraction_plan(
prompt: str,
files: List[Dict[str, Any]],
messages: List[Dict[str, Any]],
ai_service,
workflow_id: str = None,
add_log_func = None
) -> List[Dict[str, Any]]:
"""
Erstellt einen Extraktionsplan mit AI-Unterstützung.
Args:
prompt: Spezifizierung, welche Daten extrahiert werden sollen
files: Liste aller verfügbaren Dateien mit Metadaten
messages: Liste aller Nachrichten im Workflow
ai_service: Service für KI-Anfragen
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
Returns:
Extraktionsplan (Liste von Extraktionsanweisungen pro Datei)
"""
# Erstelle Kontext-Informationen für den AI Call
file_infos = []
for file in files:
# Basis-Metadaten
file_info = {
"id": file.get("id", ""),
"name": file.get("name", ""),
"type": file.get("type", ""),
"content_type": file.get("content_type", ""),
"size": file.get("size", "")
}
# Extraktionsstatus prüfen (falls vorhanden)
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
if doc_contents:
# Prüfen, ob mindestens ein Content mit is_extracted=True existiert
already_extracted = any(
content.get("is_extracted", False) for content in doc_contents
)
file_info["already_extracted"] = already_extracted
# Eine kurze Vorschau des Inhalts hinzufügen (falls verfügbar)
for content in doc_contents:
if content.get("type") == "text" and content.get("text"):
preview_text = content.get("text", "")[:200] + "..." if len(content.get("text", "")) > 200 else content.get("text", "")
file_info["content_preview"] = preview_text
break
else:
file_info["already_extracted"] = False
file_infos.append(file_info)
# AI-Prompt erstellen
extraction_prompt = f"""
Du bist ein Datenextraktionsexperte, der mithilfe von KI-Analyse entscheidet, welche Dateien
und Inhalte für eine bestimmte Aufgabe extrahiert werden müssen.
AUFGABE:
{prompt}
VERFÜGBARE DATEIEN:
{json.dumps(file_infos, indent=2)}
Für jede Datei, die für die Aufgabe relevant ist, erstelle eine Extraktionsanweisung mit den folgenden Informationen:
1. file_id: Die ID der zu extrahierenden Datei
2. extract_needed: Boolean, ob eine Extraktion erforderlich ist (True, wenn die Datei noch nicht extrahiert wurde und für die Aufgabe benötigt wird)
3. extraction_prompt: Ein spezifischer Prompt für die Extraktion der Datei (besonders wichtig für Bilder und nicht-textbasierte Dateien)
4. importance: Priorität/Wichtigkeit für die Aufgabe (1-5, wobei 5 am wichtigsten ist)
Format:
[
{{
"file_id": "1234",
"extract_needed": true,
"extraction_prompt": "Extrahiere die Tabellendaten mit Fokus auf die Umsatzzahlen",
"importance": 5
}},
...
]
Gib nur das JSON-Array zurück, ohne weitere Erklärungen.
"""
# Log hinzufügen
if add_log_func and workflow_id:
add_log_func(workflow_id, "Extraktionsplan wird erstellt...", "info")
try:
# AI-Call durchführen
extraction_plan_response = await ai_service.call_api([{"role": "user", "content": extraction_prompt}])
# JSON aus der Antwort extrahieren
import re
json_match = re.search(r'\[.*\]', extraction_plan_response, re.DOTALL)
if json_match:
extraction_plan = json.loads(json_match.group(0))
# Log hinzufügen
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Extraktionsplan erstellt für {len(extraction_plan)} Dateien",
"info"
)
return extraction_plan
else:
# Fallback bei Parsing-Problemen
if add_log_func and workflow_id:
add_log_func(
workflow_id,
"Parsing-Fehler beim Extraktionsplan, erstelle Standard-Plan",
"warning"
)
# Standard-Plan: Alle nicht extrahierten Dateien extrahieren
default_plan = []
for file in files:
doc_contents = _extract_document_contents_from_messages(file.get("id", ""), messages)
already_extracted = any(
content.get("is_extracted", False) for content in doc_contents
) if doc_contents else False
default_plan.append({
"file_id": file.get("id", ""),
"extract_needed": not already_extracted,
"extraction_prompt": f"Extrahiere alle relevanten Informationen aus {file.get('name', '')}",
"importance": 3
})
return default_plan
except Exception as e:
logger.error(f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}", exc_info=True)
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Fehler bei der Erstellung des Extraktionsplans: {str(e)}",
"error"
)
# Leerer Plan bei Fehlern
return []
async def _execute_extractions(
extraction_plan: List[Dict[str, Any]],
files: List[Dict[str, Any]],
lucydom_interface,
ai_service,
workflow_id: str = None,
add_log_func = None
) -> List[Dict[str, Any]]:
"""
Führt die geplanten Extraktionen durch.
Args:
extraction_plan: Liste von Extraktionsanweisungen
files: Liste aller verfügbaren Dateien
lucydom_interface: Interface für Datenbankzugriffe
ai_service: Service für KI-Anfragen
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
Returns:
Liste mit extrahierten Daten pro Datei
"""
extracted_data = []
# Nach Wichtigkeit sortieren
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
for extraction_item in sorted_plan:
file_id = extraction_item.get("file_id")
extract_needed = extraction_item.get("extract_needed", False)
extraction_prompt = extraction_item.get("extraction_prompt", "")
# Dateimetadaten finden
file_metadata = next((f for f in files if f.get("id") == file_id), None)
if not file_metadata:
logger.warning(f"Datei mit ID {file_id} nicht gefunden")
continue
file_name = file_metadata.get("name", "")
file_type = file_metadata.get("type", "")
content_type = file_metadata.get("content_type", "")
# Log hinzufügen
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Verarbeite Datei: {file_name} (Extraktion notwendig: {extract_needed})",
"info"
)
# Extraktion nur durchführen, wenn notwendig
if extract_needed:
# Dateiinhalt über LucyDOM-Interface abrufen
if lucydom_interface:
try:
file_content = await lucydom_interface.read_file_content(file_id)
if not file_content:
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Datei {file_name} nicht gefunden", "warning")
continue
# Extraktion basierend auf Dateityp durchführen
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
# Bildanalyse mit AI-Service
if ai_service and hasattr(ai_service, "analyze_image"):
try:
image_analysis = await ai_service.analyze_image(
image_data=file_content,
prompt=extraction_prompt,
mime_type=content_type
)
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": image_analysis,
"is_extracted": True,
"extraction_method": "image_analysis"
})
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Bild {file_name} erfolgreich analysiert", "info")
except Exception as e:
logger.error(f"Fehler bei der Bildanalyse {file_name}: {str(e)}")
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Fehler bei der Bildanalyse {file_name}: {str(e)}", "error")
else:
# Fallback, wenn keine Bildanalyse verfügbar
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": f"Bild: {file_name} (Analyse nicht verfügbar)",
"is_extracted": False,
"extraction_method": "none"
})
else:
# Text-basierte Extraktion für alle anderen Dateitypen
try:
content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": content,
"is_extracted": is_extracted,
"extraction_method": "text_extraction"
})
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Datei {file_name} extrahiert (Status: {is_extracted})",
"info"
)
except Exception as e:
logger.error(f"Fehler bei der Textextraktion {file_name}: {str(e)}")
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Fehler bei der Textextraktion {file_name}: {str(e)}", "error")
except Exception as e:
logger.error(f"Fehler beim Lesen der Datei {file_name}: {str(e)}")
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Fehler beim Lesen der Datei {file_name}: {str(e)}", "error")
else:
logger.warning(f"Kein LucyDOM-Interface verfügbar für Datei {file_name}")
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Kein LucyDOM-Interface verfügbar für Datei {file_name}", "warning")
else:
# Keine Extraktion notwendig, vorhandene Inhalte verwenden
doc_contents = _extract_document_contents_from_messages(file_id, messages)
if doc_contents:
# Ersten Textinhalt verwenden
for content in doc_contents:
if content.get("type") == "text":
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": content.get("text", ""),
"is_extracted": content.get("is_extracted", False),
"extraction_method": "existing_content"
})
break
else:
# Keine vorhandenen Inhalte gefunden
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": f"Keine Inhalte verfügbar für {file_name}",
"is_extracted": False,
"extraction_method": "none"
})
return extracted_data
def _structure_extracted_data(
extracted_data: List[Dict[str, Any]],
files: List[Dict[str, Any]],
prompt: str
) -> Dict[str, Any]:
"""
Strukturiert die extrahierten Daten in ein formatiertes Ergebnis.
Args:
extracted_data: Liste der extrahierten Daten pro Datei
files: Liste aller verfügbaren Dateien
prompt: Ursprünglicher Extraktionsprompt
Returns:
Strukturiertes Ergebnisobjekt
"""
# Basis-Struktur erstellen
result = {
"prompt": prompt,
"files_processed": len(extracted_data),
"total_files": len(files),
"extraction_timestamp": datetime.now().isoformat(),
"status": "success",
"extracted_content": []
}
# Extrahierte Inhalte hinzufügen
for data_item in extracted_data:
# Datei Metadaten anreichern
file_id = data_item.get("file_id", "")
file_metadata = next((f for f in files if f.get("id") == file_id), {})
content_item = {
"file_id": file_id,
"name": data_item.get("name", file_metadata.get("name", "")),
"type": data_item.get("type", file_metadata.get("type", "")),
"content_type": file_metadata.get("content_type", ""),
"size": file_metadata.get("size", ""),
"is_extracted": data_item.get("is_extracted", False),
"extraction_method": data_item.get("extraction_method", ""),
"content": data_item.get("content", "")
}
result["extracted_content"].append(content_item)
return result
def _extract_document_contents_from_messages(file_id: str, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Extrahiert Document-Contents für eine bestimmte Datei aus den Workflow-Nachrichten.
Args:
file_id: ID der Datei
messages: Liste aller Nachrichten im Workflow
Returns:
Liste der Document-Contents für die angegebene Datei
"""
contents = []
for message in messages:
# Dokumente in der Nachricht durchsuchen
for document in message.get("documents", []):
source = document.get("source", {})
# Prüfen, ob die Datei-ID übereinstimmt
if source.get("id") == file_id or source.get("type") == "file" and source.get("id") == file_id:
# Contents der Datei hinzufügen
doc_contents = document.get("contents", [])
if doc_contents:
contents.extend(doc_contents)
return contents
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
# Log über die Logger-Instanz
if log_type == "error":
logger.error(message)
elif log_type == "warning":
logger.warning(message)
else:
logger.info(message)
# Log über die bereitgestellte Log-Funktion (falls vorhanden)
if add_log_func and workflow_id:
add_log_func(workflow_id, message, log_type, agent_id, agent_name)

View file

@ -1,638 +0,0 @@
"""
Zentrales Filehandling-Modul für den Agentservice.
Enthält alle Funktionen für das Verarbeiten von Dateien.
Angepasst, um mit LucyDOMInterface als zentrale Datei-Autorität zu arbeiten.
"""
import os
import logging
import base64
import json
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional, Tuple, Union, BinaryIO
from io import BytesIO # Import BytesIO at the top level
# Bibliotheken für Dateiverarbeitung
try:
import pandas as pd
except ImportError:
pd = None
logger = logging.getLogger(__name__)
# Custom exception für das File-Handling
class FileProcessingError(Exception):
"""Basisklasse für Fehler bei der Dateiverarbeitung im AgentService."""
pass
class FileExtractionError(FileProcessingError):
"""Fehler bei der Textextraktion aus Dateien."""
pass
class FileAnalysisError(FileProcessingError):
"""Fehler bei der Analyse von Dateien."""
pass
def encode_to_base64(content: bytes, mime_type: str = None) -> str:
"""
Kodiert Binärdaten als Base64-String.
Args:
content: Die zu kodierenden Binärdaten
mime_type: Optionaler MIME-Typ für das Encoding
Returns:
Base64-kodierter String
"""
base64_data = base64.b64encode(content).decode('utf-8')
return base64_data
def prepare_file_contexts(files: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Bereitet die Dateikontexte basierend auf Metadaten vor.
Akzeptiert keine Pfade mehr, sondern nur Metadaten aus der Datenbank.
Args:
files: Liste von Dateien mit Metadaten (Dict mit id, name, type, content_type)
Returns:
Liste von Dateikontexten für die Verarbeitung
"""
file_contexts = []
logger.info(f"Preparing file contexts for {len(files)} files")
for file in files:
file_id = file.get("id")
file_name = file.get("name")
file_type = file.get("type")
# Create a comprehensive context with all available metadata
context = {
"id": file_id,
"name": file_name,
"type": file_type,
"size": file.get("size", "Unbekannt"),
"content_type": file.get("content_type"),
"path": file.get("path"),
"upload_date": file.get("upload_date"),
"hash": file.get("hash"),
"mandate_id": file.get("mandate_id"),
"user_id": file.get("user_id")
}
# Log for debugging
logger.info(f"Created file context: {file_name} (ID: {file_id}, Type: {file_type})")
file_contexts.append(context)
return file_contexts
def is_text_extractable(file_name: str, content_type: str = None) -> bool:
"""
Prüft, ob aus der Datei Text extrahiert werden kann.
Args:
file_name: Name der Datei für die Erkennung des Formats
content_type: Optional MIME-Typ der Datei
Returns:
True wenn Text extrahiert werden kann, sonst False
"""
# Einfache Textdateien
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py', '.csv')):
return True
# Excel-Dateien
elif file_name.endswith(('.xlsx', '.xls')):
return pd is not None # Nur extrahierbar, wenn pandas installiert ist
# PDF-Dateien - Textextraktion ist möglich
elif file_name.endswith('.pdf'):
try:
# Prüfen ob PyPDF2 oder PyMuPDF installiert sind
try:
import PyPDF2
return True
except ImportError:
try:
import fitz # PyMuPDF
return True
except ImportError:
return False
except:
return False
# Bildformate - nicht als Text extrahierbar
elif file_name.endswith(('.jpg', '.jpeg', '.png', '.gif', '.bmp', '.webp', '.svg')):
return False
# Video-Formate - nicht als Text extrahierbar
elif file_name.endswith(('.mp4', '.avi', '.mov', '.mkv', '.flv', '.wmv')):
return False
# Audio-Formate - nicht als Text extrahierbar
elif file_name.endswith(('.mp3', '.wav', '.ogg', '.flac', '.aac')):
return False
# Content-Type prüfen, falls Dateiendung nicht eindeutig ist
if content_type:
if content_type.startswith(('text/', 'application/json', 'application/xml')):
return True
elif content_type == 'application/pdf':
return True
elif content_type.startswith(('image/', 'video/', 'audio/')):
return False
# Im Zweifelsfall versuchen zu extrahieren
return True
def extract_text_from_file_content(file_content: bytes, file_name: str, content_type: str = None) -> Tuple[str, bool]:
"""
Extrahiert Text aus verschiedenen Dateiformaten basierend auf dem Binärinhalt.
Args:
file_content: Binärinhalt der Datei
file_name: Name der Datei für die Erkennung des Formats
content_type: Optional MIME-Typ der Datei
Returns:
Tuple mit (extrahierter Text, is_extracted Flag)
"""
# Prüfen, ob Text extrahierbar ist
if not is_text_extractable(file_name, content_type):
return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False
try:
# Einfache Textdateien
if file_name.endswith(('.txt', '.md', '.json', '.xml', '.html', '.htm', '.css', '.js', '.py')):
try:
return file_content.decode('utf-8'), True
except UnicodeDecodeError:
try:
return file_content.decode('latin1'), True
except:
return file_content.decode('cp1252', errors='replace'), True
# Excel-Dateien
elif file_name.endswith(('.xlsx', '.xls')):
if pd is not None:
# Temporäre Datei im Speicher erstellen
file_obj = BytesIO(file_content)
df = pd.read_excel(file_obj)
result = f"Excel file with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
result += df.to_string(index=False)
return result, True
else:
return f"[Excel-Datei: {file_name} - pandas nicht installiert]", False
# CSV-Dateien
elif file_name.endswith('.csv'):
if pd is not None:
try:
# Temporäre Datei im Speicher erstellen
file_obj = BytesIO(file_content)
df = pd.read_csv(file_obj, encoding='utf-8')
except UnicodeDecodeError:
file_obj = BytesIO(file_content)
try:
df = pd.read_csv(file_obj, encoding='latin1')
except:
file_obj = BytesIO(file_content)
df = pd.read_csv(file_obj, encoding='cp1252')
result = f"CSV file with {len(df)} rows and {len(df.columns)} columns.\n"
result += f"Columns: {', '.join(df.columns.tolist())}\n\n"
result += df.to_string(index=False)
return result, True
else:
return f"[CSV-Datei: {file_name} - pandas nicht installiert]", False
# PDF-Dateien
elif file_name.endswith('.pdf'):
try:
try:
from PyPDF2 import PdfReader
reader = PdfReader(BytesIO(file_content))
text = ""
for page in reader.pages:
text += page.extract_text() + "\n\n"
return text, True
except ImportError:
try:
import fitz # PyMuPDF
doc = fitz.open(stream=file_content, filetype="pdf")
text = ""
for page in doc:
text += page.get_text() + "\n\n"
return text, True
except ImportError:
return f"[PDF: {file_name} - Keine PDF-Bibliothek installiert]", False
except Exception as e:
raise FileExtractionError(f"Fehler beim Lesen der PDF-Datei {file_name}: {str(e)}")
# Sonstige Dateien
else:
return f"[Datei: {file_name} - Textextraktion nicht unterstützt]", False
except Exception as e:
logger.error(f"Fehler beim Extrahieren von Text aus {file_name}: {str(e)}")
return f"[Fehler bei der Textextraktion: {str(e)}]", False
async def extract_and_analyze_pdf_images(
pdf_content: bytes,
prompt: str,
ai_service
) -> List[Dict[str, Any]]:
"""
Extrahiert Bilder aus einer PDF-Datei und analysiert sie.
Arbeitet mit Binärdaten statt Dateipfaden.
Args:
pdf_content: Binärdaten der PDF-Datei
prompt: Prompt für die Bildanalyse
ai_service: AI-Service für die Bildanalyse
Returns:
Liste mit Analyseergebnissen für jedes Bild
"""
image_responses = []
temp_files = [] # Liste der temporären Dateien zur Bereinigung
try:
# PDF mit PyMuPDF öffnen
import fitz # PyMuPDF
# BytesIO is already imported at the top level
import tempfile
# PDF im Speicher öffnen
doc = fitz.open(stream=pdf_content, filetype="pdf")
logger.info(f"PDF geöffnet mit {len(doc)} Seiten")
for page_num, page in enumerate(doc, 1):
# Alle Bilder auf der Seite finden
image_list = page.get_images(full=True)
if image_list:
logger.info(f"Seite {page_num}: {len(image_list)} Bilder gefunden")
for img_index, img in enumerate(image_list):
try:
# Bild-Referenz
xref = img[0]
# Bild und Metadaten extrahieren
base_image = doc.extract_image(xref)
image_bytes = base_image["image"] # Tatsächliche Bilddaten
image_ext = base_image["ext"] # Dateiendung (jpg, png, etc.)
# Erstelle temporäre Datei
fd, temp_img_path = tempfile.mkstemp(suffix=f".{image_ext}")
temp_files.append(temp_img_path) # Zur Bereinigungsliste hinzufügen
with os.fdopen(fd, 'wb') as img_file:
img_file.write(image_bytes)
logger.debug(f"Bild temporär gespeichert: {temp_img_path}")
# Analysiere mit AI-Service
try:
analysis_result = await ai_service.analyze_image(
image_data=image_bytes, # Direktes Übergeben der Bilddaten
prompt=prompt,
mime_type=f"image/{image_ext}"
)
logger.debug(f"Bildanalyse für Bild {img_index} auf Seite {page_num} abgeschlossen")
except Exception as analyze_error:
logger.error(f"Fehler bei der Bildanalyse: {str(analyze_error)}")
analysis_result = f"[Fehler bei der Bildanalyse: {str(analyze_error)}]"
# Ergebnis speichern
try:
# Versuche zuerst, die Größe aus base_image zu bekommen
if 'width' in base_image and 'height' in base_image:
image_size = f"{base_image['width']}x{base_image['height']}"
else:
# Alternative: Öffne das temporäre Bild, um die Größe zu bestimmen
from PIL import Image
with Image.open(temp_img_path) as img:
width, height = img.size
image_size = f"{width}x{height}"
except Exception as e:
logger.warning(f"Konnte Bildgröße nicht ermitteln: {str(e)}")
image_size = "unbekannt"
image_responses.append({
"page": page_num,
"image_index": img_index,
"format": image_ext,
"image_size": image_size,
"response": analysis_result
})
except Exception as e:
logger.warning(f"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num}: {str(e)}")
continue
logger.info(f"Extrahiert und analysiert: {len(image_responses)} Bilder aus PDF")
except ImportError:
logger.error("PyMuPDF (fitz) ist nicht installiert. Installiere es mit 'pip install pymupdf'")
raise FileExtractionError("PyMuPDF (fitz) ist nicht installiert")
except Exception as e:
logger.error(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
raise FileExtractionError(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
finally:
# Bereinige alle temporären Dateien
for temp_file in temp_files:
try:
if os.path.exists(temp_file):
os.remove(temp_file)
except Exception as e:
logger.warning(f"Konnte temporäre Datei nicht entfernen: {temp_file} - {str(e)}")
return image_responses
def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde.
Args:
message: Die zu erweiternde Nachricht
file_data: Dateimetadaten und Inhalt
Returns:
Die aktualisierte Nachricht mit der Datei
"""
# Detailliertes Logging für Debugging
logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})")
# Initialize documents array if needed
if "documents" not in message:
message["documents"] = []
logger.debug("Initialized empty documents array in message")
# Create a unique ID for the document if not provided
import uuid
doc_id = file_data.get("id", f"file_{uuid.uuid4()}")
# Extract file size if available
file_size = file_data.get("size")
if isinstance(file_size, str) and file_size.isdigit():
file_size = int(file_size)
elif file_size is None and file_data.get("content"):
# Estimate size from content if not provided
file_size = len(file_data.get("content", ""))
# Bestimmen, ob der Inhalt bereits extrahiert wurde
content = file_data.get("content", "No content available")
file_name = file_data.get("name", "unnamed_file")
content_type = file_data.get("content_type")
# Prüfen, ob der Inhalt als extrahiert markiert werden sollte
is_extracted = file_data.get("is_extracted", False)
if not is_extracted and isinstance(content, str) and content.strip() and file_name:
# Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp
is_extracted = is_text_extractable(file_name, content_type)
# Create standard document structure that matches the data model
document = {
"id": doc_id,
"source": {
"type": "file",
"id": file_data.get("id", doc_id),
"name": file_name,
"content_type": content_type,
"size": file_size,
"upload_date": file_data.get("upload_date", datetime.now().isoformat())
},
"contents": [
{
"type": "text",
"text": content,
"is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen
}
]
}
# Log document structure for debugging
logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}")
# Check if file is already in the message to avoid duplicates
file_already_added = any(
doc.get("source", {}).get("id") == file_data.get("id")
for doc in message.get("documents", [])
)
if not file_already_added:
message["documents"].append(document)
logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)")
else:
logger.info(f"File {file_name} already exists in message, skipping")
return message
def extract_files_from_message(message: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Extrahiert Dateiinformationen aus einer Nachricht.
Funktion für Workflow-Manager und interne Verwendung.
Args:
message: Die Nachricht, aus der Dateien extrahiert werden sollen
Returns:
Liste der extrahierten Dateiinformationen
"""
files = []
if "documents" not in message:
logger.debug("No documents found in message")
return files
# Log for debugging
logger.debug(f"Extracting files from message with {len(message.get('documents', []))} documents")
for doc in message.get("documents", []):
doc_source = doc.get("source", {})
# Nur Dateien extrahieren
if doc_source.get("type") == "file":
file_info = {
"id": doc_source.get("id", f"file_{uuid.uuid4()}"),
"name": doc_source.get("name", "unnamed_file"),
"content_type": doc_source.get("content_type"),
"size": doc_source.get("size")
}
# Inhalt extrahieren, falls vorhanden
doc_contents = doc.get("contents", [])
for content in doc_contents:
if content.get("type") == "text":
file_info["content"] = content.get("text", "")
break
logger.debug(f"Extracted file: {file_info.get('name')} (ID: {file_info.get('id')})")
files.append(file_info)
else:
logger.debug(f"Skipping non-file document of type: {doc_source.get('type')}")
logger.info(f"Extracted {len(files)} files from message")
return files
async def read_file_contents(
file_contexts: List[Dict[str, Any]],
lucydom_interface,
workflow_id: str = None,
add_log_func = None,
ai_service = None # AI service parameter for image analysis
) -> Dict[str, Dict[str, Any]]:
"""
Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch.
Verwendet LucyDOM-Interface statt direkter Dateizugriffe.
Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück.
Args:
file_contexts: Liste der Dateikontexte mit Metadaten
lucydom_interface: LucyDOM-Interface für Dateizugriffe
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
ai_service: Optionaler AI-Service für die Bildanalyse
Returns:
Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...})
"""
file_contents = {}
# Add debug logging
logger.info(f"Reading contents of {len(file_contexts)} files for workflow {workflow_id}")
for file in file_contexts:
file_id = file["id"]
file_name = file["name"]
file_type = file.get("type", "unknown")
try:
# Dateiinhalt über LucyDOM-Interface abrufen
file_data = await lucydom_interface.read_file_content(file_id)
if not file_data:
_log(add_log_func, workflow_id, f"Datei {file_name} nicht gefunden", "warning")
file_contents[file_id] = {
"content": f"File content not available (File not found)",
"is_extracted": False,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
continue
logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})")
# Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
if ai_service and hasattr(ai_service, "analyze_image"):
try:
image_analysis = await ai_service.analyze_image(
image_data=file_data,
prompt="Describe this image in detail",
mime_type=file.get("content_type")
)
logger.debug(f"Image analysis successfully generated for {file_name}")
file_contents[file_id] = {
"content": f"Image Analysis:\n{image_analysis}",
"is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
_log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info")
except Exception as e:
logger.error(f"Error analyzing image {file_name}: {str(e)}")
_log(add_log_func, workflow_id, f"Error analyzing image {file_name}: {str(e)}", "error")
file_contents[file_id] = {
"content": f"Image file: {file_name} (Analysis failed: {str(e)})",
"is_extracted": False,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
else:
file_contents[file_id] = {
"content": f"Image file: {file_name} (AI analysis not available)",
"is_extracted": False,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
# Dokument- und Textdateien
elif file_type == "document" or not file_type:
# Verwende die zentrale Textextraktionsfunktion mit Dateiinhalt
content, is_extracted = extract_text_from_file_content(
file_data, file_name, file.get("content_type")
)
file_contents[file_id] = {
"content": content,
"is_extracted": is_extracted,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
_log(add_log_func, workflow_id,
f"File {file_name} read successfully (extracted: {is_extracted})", "info")
# Andere Dateitypen - nur Metadaten speichern
else:
file_contents[file_id] = {
"content": f"File: {file_name} (Type: {file_type}, content not available)",
"is_extracted": False,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
_log(add_log_func, workflow_id, f"Unsupported file type: {file_type} for {file_name}", "warning")
except Exception as e:
logger.error(f"Error reading file {file_name}: {str(e)}")
_log(add_log_func, workflow_id, f"Error reading file {file_name}: {str(e)}", "error")
file_contents[file_id] = {
"content": f"File content not available (Error: {str(e)})",
"is_extracted": False,
"name": file_name,
"type": file_type,
"content_type": file.get("content_type")
}
return file_contents
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
# Log über die Logger-Instanz
if log_type == "error":
logger.error(message)
elif log_type == "warning":
logger.warning(message)
else:
logger.info(message)
# Log über die bereitgestellte Log-Funktion (falls vorhanden)
if add_log_func and workflow_id:
add_log_func(workflow_id, message, log_type, agent_id, agent_name)
def _log(add_log_func, workflow_id, message, log_type, agent_id=None, agent_name=None):
"""Hilfsfunktion zum Loggen mit unterschiedlichen Log-Funktionen"""
# Log über die Logger-Instanz
if log_type == "error":
logger.error(message)
elif log_type == "warning":
logger.warning(message)
else:
logger.info(message)
# Log über die bereitgestellte Log-Funktion (falls vorhanden)
if add_log_func and workflow_id:
add_log_func(workflow_id, message, log_type, agent_id, agent_name)

View file

@ -1,146 +0,0 @@
"""
Aktualisierte Registry für alle verfügbaren Agenten im System.
Enthält jetzt auch den FileCreator-Agenten.
"""
import logging
import importlib
from typing import Dict, Any, List, Optional
# Import direkt bekannter Agent-Module
# Andere Module werden dynamisch importiert
from modules.agentservice_base import BaseAgent
logger = logging.getLogger(__name__)
class AgentRegistry:
"""Registry für alle verfügbaren Agenten im System"""
_instance = None
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz der Agent-Registry zurück"""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert die Agent-Registry"""
if AgentRegistry._instance is not None:
raise RuntimeError("Singleton-Instanz existiert bereits - nutze get_instance()")
self.agents = {}
self._load_agents()
def _load_agents(self):
"""Lädt alle verfügbaren Agenten"""
# Liste aller zu ladenden Agenten-Module
agent_modules = [
"agentservice_agent_coder",
"agentservice_agent_analyst",
"agentservice_agent_webcrawler",
"agentservice_agent_sharepoint",
"agentservice_agent_documentation",
"agentservice_agent_filecreator" # Neuer FileCreator-Agent hinzugefügt
]
for module_name in agent_modules:
try:
# Importiere das Modul
try:
module = importlib.import_module(f"modules.{module_name}")
except ImportError:
module = importlib.import_module(module_name)
# Suche nach der Agent-Klasse oder einer get_*_agent-Funktion
agent_type = module_name.split('_')[-1]
class_name = f"{agent_type.capitalize()}Agent"
getter_name = f"get_{agent_type}_agent"
agent = None
# Versuche, den Agenten über die get_*_agent-Funktion zu holen
if hasattr(module, getter_name):
getter_func = getattr(module, getter_name)
agent = getter_func()
logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) via {getter_name}() geladen")
# Alternativ versuche, den Agenten direkt zu instanziieren
elif hasattr(module, class_name):
agent_class = getattr(module, class_name)
agent = agent_class()
logger.info(f"Agent '{agent.name}' (Typ: {agent.type}) direkt instanziiert")
if agent:
# Registriere den Agenten
self.register_agent(agent)
else:
logger.warning(f"Keine Agent-Klasse oder getter-Funktion in Modul {module_name} gefunden")
except ImportError as e:
logger.warning(f"Modul {module_name} konnte nicht importiert werden: {e}")
except Exception as e:
logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
def register_agent(self, agent: BaseAgent):
"""Registriert einen Agenten in der Registry."""
agent_type = agent.type
self.agents[agent_type] = agent
# Zusätzlich nach ID registrieren
self.agents[agent.id] = agent
logger.debug(f"Agent '{agent.name}' (Typ: {agent_type}) wurde registriert")
def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:
"""
Gibt eine Instanz eines Agenten nach ID oder Typ zurück.
Args:
agent_identifier: ID oder Typ des gewünschten Agenten
Returns:
Agent-Instanz oder None, wenn nicht gefunden
"""
# Versuche, direkt nach Typ zu finden
if agent_identifier in self.agents:
return self.agents[agent_identifier]
# Wenn nicht gefunden, versuche verschiedene Varianten des Namens
variants = [
agent_identifier,
agent_identifier.replace('_agent', ''),
f"{agent_identifier}_agent"
]
for variant in variants:
if variant in self.agents:
return self.agents[variant]
logger.warning(f"Agent mit Identifier '{agent_identifier}' nicht gefunden")
return None
def get_all_agents(self) -> Dict[str, BaseAgent]:
"""Gibt alle registrierten Agenten zurück."""
return self.agents
def get_agent_infos(self) -> List[Dict[str, Any]]:
"""Gibt Informationen zu allen registrierten Agenten zurück."""
agent_infos = []
# Nur einmal pro Agent-Instanz (da wir sowohl nach Typ als auch nach ID registrieren)
seen_agents = set()
for agent in self.agents.values():
if agent not in seen_agents:
agent_infos.append(agent.get_agent_info())
seen_agents.add(agent)
return agent_infos
def initialize_agents_for_workflow(self) -> Dict[str, Dict[str, Any]]:
"""Initialisiert Agenten für einen Workflow."""
initialized_agents = {}
seen_agents = set()
for agent in self.agents.values():
if agent not in seen_agents:
agent_info = agent.get_agent_info()
agent_id = agent_info["id"]
initialized_agents[agent_id] = agent_info
seen_agents.add(agent)
return initialized_agents

File diff suppressed because it is too large Load diff

View file

@ -1,469 +0,0 @@
import os
import logging
from typing import Dict, Any, List, Optional, Union
import importlib
from passlib.context import CryptContext
from connectors.connector_db_json import DatabaseConnector
logger = logging.getLogger(__name__)
# Password-Hashing
pwd_context = CryptContext(schemes=["argon2"], deprecated="auto")
class GatewayInterface:
"""
Interface zum Gateway-System.
Verwaltet Benutzer und Mandanten.
"""
def __init__(self, mandate_id: int = None, user_id: int = None):
"""
Initialisiert das Gateway-Interface mit optionalem Mandanten- und Benutzerkontext.
Args:
mandate_id: ID des aktuellen Mandanten (optional)
user_id: ID des aktuellen Benutzers (optional)
"""
# Bei der Initialisierung kann der Kontext leer sein
self.mandate_id = mandate_id
self.user_id = user_id
# Datenverzeichnis
self.data_folder = "_database_gateway"
os.makedirs(self.data_folder, exist_ok=True)
logger.info("db for data_gateway attached")
# Datenmodell-Modul importieren
try:
self.model_module = importlib.import_module("modules.gateway_model")
logger.info("gateway_model erfolgreich importiert")
except ImportError as e:
logger.error(f"Fehler beim Importieren von gateway_model: {e}")
raise
# Konnektor erstellen
logger.info(f"API getting connector {mandate_id} {user_id}")
self.db = DatabaseConnector(
db_folder=self.data_folder,
mandate_id=self.mandate_id if self.mandate_id is not None else 0,
user_id=self.user_id if self.user_id is not None else 0
)
# Datenbank initialisieren, falls nötig
self._initialize_database()
def _initialize_database(self):
"""
Initialisiert die Datenbank mit minimalen Objekten,
falls sie noch nicht existiert.
"""
# Prüfe, ob Mandanten existieren
# Erstelle den Root-Mandanten, falls nötig
existing_mandate_id = self.get_initial_id("mandates")
mandates = self.db.get_recordset("mandates")
if existing_mandate_id is None or not mandates:
logger.info("Erstelle Root-Mandant")
root_mandate = {
"name": "Root",
"language": "de"
}
created_mandate = self.db.record_create("mandates", root_mandate)
logger.info(f"Root-Mandant wurde erstellt mit ID {created_mandate['id']}")
# Aktualisiere den Mandanten-Kontext
self.mandate_id = created_mandate['id']
self.user_id = created_mandate['user_id']
# Konnektor mit korrektem Kontext neu erstellen
self.db = DatabaseConnector(
db_folder=self.data_folder,
mandate_id=self.mandate_id,
user_id=self.user_id
)
# Prüfe, ob Benutzer existieren
# Erstelle den Admin-Benutzer, falls nötig
existing_user_id = self.get_initial_id("users")
users = self.db.get_recordset("users")
if existing_user_id is None or not users:
logger.info("Erstelle Admin-Benutzer")
admin_user = {
"mandate_id": self.mandate_id,
"username": "admin",
"email": "admin@example.com",
"full_name": "Administrator",
"disabled": False,
"language": "de",
"privilege": "sysadmin", # SysAdmin-Berechtigung
"hashed_password": self._get_password_hash("admin") # In der Produktion ein sicheres Passwort verwenden!
}
created_user = self.db.record_create("users", admin_user)
logger.info(f"Admin-Benutzer wurde erstellt mit ID {created_user['id']}")
# Aktualisiere den Benutzer-Kontext
self.user_id = created_user['id']
# Konnektor mit korrektem Kontext neu erstellen
self.db = DatabaseConnector(
db_folder=self.data_folder,
mandate_id=self.mandate_id,
user_id=self.user_id
)
def get_initial_id(self, table: str) -> Optional[int]:
"""
Gibt die initiale ID für eine Tabelle zurück.
Args:
table: Name der Tabelle
Returns:
Die initiale ID oder None, wenn nicht vorhanden
"""
return self.db.get_initial_id(table)
def _get_password_hash(self, password: str) -> str:
"""Erstellt einen Hash für ein Passwort"""
return pwd_context.hash(password)
def _verify_password(self, plain_password: str, hashed_password: str) -> bool:
"""Überprüft, ob das Passwort zum Hash passt"""
return pwd_context.verify(plain_password, hashed_password)
def _get_current_timestamp(self) -> str:
"""Gibt den aktuellen Zeitstempel im ISO-Format zurück"""
from datetime import datetime
return datetime.now().isoformat()
# Mandanten-Methoden
def get_all_mandates(self) -> List[Dict[str, Any]]:
"""Gibt alle Mandanten zurück"""
return self.db.get_recordset("mandates")
def get_mandate(self, mandate_id: int) -> Optional[Dict[str, Any]]:
"""Gibt einen Mandanten anhand seiner ID zurück"""
mandates = self.db.get_recordset("mandates", record_filter={"id": mandate_id})
if mandates:
return mandates[0]
return None
def create_mandate(self, name: str, language: str = "de") -> Dict[str, Any]:
"""Erstellt einen neuen Mandanten"""
mandate_data = {
"name": name,
"language": language
}
return self.db.record_create("mandates", mandate_data)
def update_mandate(self, mandate_id: int, mandate_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Aktualisiert einen bestehenden Mandanten
Args:
mandate_id: Die ID des zu aktualisierenden Mandanten
mandate_data: Die zu aktualisierenden Mandantendaten
Returns:
Dict[str, Any]: Die aktualisierten Mandantendaten
Raises:
ValueError: Wenn der Mandant nicht gefunden wurde
"""
# Prüfe, ob der Mandant existiert
mandate = self.get_mandate(mandate_id)
if not mandate:
raise ValueError(f"Mandant mit ID {mandate_id} nicht gefunden")
# Aktualisiere den Mandanten
updated_mandate = self.db.record_modify("mandates", mandate_id, mandate_data)
return updated_mandate
def delete_mandate(self, mandate_id: int) -> bool:
"""
Löscht einen Mandanten und alle damit verbundenen Benutzer und Daten
Args:
mandate_id: Die ID des zu löschenden Mandanten
Returns:
bool: True, wenn der Mandant erfolgreich gelöscht wurde, sonst False
"""
# Prüfe, ob der Mandant existiert
mandate = self.get_mandate(mandate_id)
if not mandate:
return False
# Prüfe, ob es der initiale Mandant ist
initial_mandate_id = self.get_initial_id("mandates")
if initial_mandate_id is not None and mandate_id == initial_mandate_id:
logger.warning(f"Versuch, den Root-Mandanten zu löschen, wurde verhindert")
return False
# Finde alle Benutzer des Mandanten
users = self.get_users_by_mandate(mandate_id)
# Lösche alle Benutzer des Mandanten und ihre zugehörigen Daten
for user in users:
self.delete_user(user["id"])
# Lösche den Mandanten
success = self.db.record_delete("mandates", mandate_id)
if success:
logger.info(f"Mandant mit ID {mandate_id} wurde erfolgreich gelöscht")
else:
logger.error(f"Fehler beim Löschen des Mandanten mit ID {mandate_id}")
return success
# Benutzer-Methoden
def get_all_users(self) -> List[Dict[str, Any]]:
"""Gibt alle Benutzer zurück"""
users = self.db.get_recordset("users")
# Entferne die Passwort-Hashes aus der Rückgabe
for user in users:
if "hashed_password" in user:
del user["hashed_password"]
return users
def get_users_by_mandate(self, mandate_id: int) -> List[Dict[str, Any]]:
"""
Gibt alle Benutzer eines bestimmten Mandanten zurück
Args:
mandate_id: Die ID des Mandanten
Returns:
List[Dict[str, Any]]: Liste der Benutzer des Mandanten
"""
users = self.db.get_recordset("users", record_filter={"mandate_id": mandate_id})
# Entferne die Passwort-Hashes aus der Rückgabe
for user in users:
if "hashed_password" in user:
del user["hashed_password"]
return users
def get_user_by_username(self, username: str) -> Optional[Dict[str, Any]]:
"""Gibt einen Benutzer anhand seines Benutzernamens zurück"""
users = self.db.get_recordset("users")
for user in users:
if user.get("username") == username:
return user
return None
def get_user(self, user_id: int) -> Optional[Dict[str, Any]]:
"""Gibt einen Benutzer anhand seiner ID zurück"""
users = self.db.get_recordset("users", record_filter={"id": user_id})
if users:
user = users[0]
# Entferne das Passwort-Hash aus der Rückgabe für die API
if "hashed_password" in user:
user_copy = user.copy()
del user_copy["hashed_password"]
return user_copy
return user
return None
def create_user(self, username: str, password: str, email: str = None,
full_name: str = None, language: str = "de", mandate_id: int = None,
disabled: bool = False, privilege: str = "user") -> Dict[str, Any]:
"""
Erstellt einen neuen Benutzer
Args:
username: Der Benutzername
password: Das Passwort
email: Die E-Mail-Adresse (optional)
full_name: Der vollständige Name (optional)
language: Die bevorzugte Sprache (Standard: "de")
mandate_id: Die ID des Mandanten (optional)
disabled: Ob der Benutzer deaktiviert ist (Standard: False)
privilege: Die Berechtigungsstufe (Standard: "user")
Returns:
Dict[str, Any]: Die erstellten Benutzerdaten
Raises:
ValueError: Wenn der Benutzername bereits existiert
"""
# Prüfe, ob der Benutzername bereits existiert
existing_user = self.get_user_by_username(username)
if existing_user:
raise ValueError(f"Benutzer '{username}' existiert bereits")
# Verwende den übergebenen mandate_id oder den aktuellen Kontext
user_mandate_id = mandate_id if mandate_id is not None else self.mandate_id
user_data = {
"mandate_id": user_mandate_id,
"username": username,
"email": email,
"full_name": full_name,
"disabled": disabled,
"language": language,
"privilege": privilege,
"hashed_password": self._get_password_hash(password)
}
created_user = self.db.record_create("users", user_data)
# Entferne das Passwort-Hash aus der Rückgabe
if "hashed_password" in created_user:
del created_user["hashed_password"]
return created_user
def authenticate_user(self, username: str, password: str) -> Optional[Dict[str, Any]]:
"""
Authentifiziert einen Benutzer anhand von Benutzername und Passwort
Args:
username: Der Benutzername
password: Das Passwort
Returns:
Optional[Dict[str, Any]]: Die Benutzerdaten oder None, wenn die Authentifizierung fehlschlägt
"""
user = self.get_user_by_username(username)
if not user:
return None
if not self._verify_password(password, user.get("hashed_password", "")):
return None
# Prüfe, ob der Benutzer deaktiviert ist
if user.get("disabled", False):
return None
# Erstelle eine Kopie ohne Passwort-Hash
authenticated_user = {**user}
if "hashed_password" in authenticated_user:
del authenticated_user["hashed_password"]
return authenticated_user
def update_user(self, user_id: int, user_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Aktualisiert einen Benutzer
Args:
user_id: Die ID des zu aktualisierenden Benutzers
user_data: Die zu aktualisierenden Benutzerdaten
Returns:
Dict[str, Any]: Die aktualisierten Benutzerdaten
Raises:
ValueError: Wenn der Benutzer nicht gefunden wurde
"""
# Hole den aktuellen Benutzer mit Hash-Passwort (direkt aus der DB)
users = self.db.get_recordset("users", record_filter={"id": user_id})
if not users:
raise ValueError(f"Benutzer mit ID {user_id} nicht gefunden")
user = users[0]
# Wenn das Passwort geändert werden soll, hashe es
if "password" in user_data:
user_data["hashed_password"] = self._get_password_hash(user_data["password"])
del user_data["password"]
# Aktualisiere den Benutzer
updated_user = self.db.record_modify("users", user_id, user_data)
# Entferne das Passwort-Hash aus der Rückgabe
if "hashed_password" in updated_user:
del updated_user["hashed_password"]
return updated_user
def disable_user(self, user_id: int) -> Dict[str, Any]:
"""Deaktiviert einen Benutzer"""
return self.update_user(user_id, {"disabled": True})
def enable_user(self, user_id: int) -> Dict[str, Any]:
"""Aktiviert einen Benutzer"""
return self.update_user(user_id, {"disabled": False})
def _delete_user_referenced_data(self, user_id: int) -> None:
"""
Löscht alle Daten, die mit einem Benutzer verbunden sind
Args:
user_id: Die ID des Benutzers
"""
# Hier werden alle Tabellen durchsucht und alle Einträge gelöscht,
# die auf diesen Benutzer verweisen
# Attribute des Benutzers löschen
try:
attributes = self.db.get_recordset("attributes", record_filter={"user_id": user_id})
for attribute in attributes:
self.db.record_delete("attributes", attribute["id"])
except Exception as e:
logger.error(f"Fehler beim Löschen der Attribute für Benutzer {user_id}: {e}")
# Weitere Tabellen, die auf den Benutzer verweisen könnten
# (Je nach Datenbankstruktur der Anwendung)
logger.info(f"Alle referenzierten Daten für Benutzer {user_id} wurden gelöscht")
def delete_user(self, user_id: int) -> bool:
"""
Löscht einen Benutzer und alle damit verbundenen Daten
Args:
user_id: Die ID des zu löschenden Benutzers
Returns:
bool: True, wenn der Benutzer erfolgreich gelöscht wurde, sonst False
"""
# Prüfe, ob der Benutzer existiert
users = self.db.get_recordset("users", record_filter={"id": user_id})
if not users:
return False
# Prüfe, ob es der initiale Benutzer ist
initial_user_id = self.get_initial_id("users")
if initial_user_id is not None and user_id == initial_user_id:
logger.warning("Versuch, den Root-Admin zu löschen, wurde verhindert")
return False
# Lösche alle mit dem Benutzer verbundenen Daten
self._delete_user_referenced_data(user_id)
# Lösche den Benutzer
success = self.db.record_delete("users", user_id)
if success:
logger.info(f"Benutzer mit ID {user_id} wurde erfolgreich gelöscht")
else:
logger.error(f"Fehler beim Löschen des Benutzers mit ID {user_id}")
return success
# Singleton-Factory für GatewayInterface-Instanzen pro Kontext
_gateway_interfaces = {}
def get_gateway_interface(mandate_id: int = None, user_id: int = None) -> GatewayInterface:
"""
Gibt eine GatewayInterface-Instanz für den angegebenen Kontext zurück.
Wiederverwendet bestehende Instanzen.
"""
context_key = f"{mandate_id}_{user_id}"
if context_key not in _gateway_interfaces:
_gateway_interfaces[context_key] = GatewayInterface(mandate_id, user_id)
return _gateway_interfaces[context_key]
# Init
get_gateway_interface()

View file

@ -1,94 +0,0 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from datetime import datetime
class Label(BaseModel):
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
default: str
translations: Dict[str, str] = {}
def get_label(self, language: str = None):
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
if language and language in self.translations:
return self.translations[language]
return self.default
class Mandate(BaseModel):
"""Datenmodell für einen Mandanten"""
id: int = Field(description="Eindeutige ID des Mandanten")
name: str = Field(description="Name des Mandanten")
language: str = Field(description="Standardsprache des Mandanten")
label: Label = Field(
default=Label(default="Mandant", translations={"en": "Mandate", "fr": "Mandat"}),
description="Label für die Klasse"
)
# Labels für Attribute
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"name": Label(default="Name des Mandanten", translations={"en": "Mandate name", "fr": "Nom du mandat"}),
"language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"})
}
class User(BaseModel):
"""Datenmodell für einen Benutzer"""
id: int = Field(description="Eindeutige ID des Benutzers")
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
username: str = Field(description="Benutzername für die Anmeldung")
email: Optional[str] = Field(None, description="E-Mail-Adresse des Benutzers")
full_name: Optional[str] = Field(None, description="Vollständiger Name des Benutzers")
language: str = Field(description="Bevorzugte Sprache des Benutzers")
disabled: Optional[bool] = Field(False, description="Gibt an, ob der Benutzer deaktiviert ist")
privilege: str = Field(description="Berechtigungsstufe") #sysadmin,admin,user
label: Label = Field(
default=Label(default="Benutzer", translations={"en": "User", "fr": "Utilisateur"}),
description="Label für die Klasse"
)
# Labels für Attribute
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"username": Label(default="Benutzername", translations={"en": "Username", "fr": "Nom d'utilisateur"}),
"email": Label(default="E-Mail", translations={"en": "Email", "fr": "E-mail"}),
"full_name": Label(default="Vollständiger Name", translations={"en": "Full name", "fr": "Nom complet"}),
"language": Label(default="Sprache", translations={"en": "Language", "fr": "Langue"}),
"disabled": Label(default="Deaktiviert", translations={"en": "Disabled", "fr": "Désactivé"}),
"privilege": Label(default="Berechtigungsstufe", translations={"en": "Access level", "fr": "Niveau d'accès"}),
}
class UserInDB(User):
"""Erweiterte Benutzerklasse mit Passwort-Hash"""
hashed_password: str = Field(description="Hash des Benutzerpassworts")
label: Label = Field(
default=Label(default="Benutzer Zugriff", translations={"en": "User Access", "fr": "Accès de l'utilisateur"}),
description="Label für die Klasse"
)
# Zusätzliches Label für das Passwort-Feld
field_labels: Dict[str, Label] = {
"hashed_password": Label(default="Passwort-Hash", translations={"en": "Password hash", "fr": "Hachage de mot de passe"})
}
class Token(BaseModel):
"""Datenmodell für ein Authentifizierungstoken"""
access_token: str = Field(description="Das ausgestellte Zugriffstoken")
token_type: str = Field(description="Typ des Tokens (meist 'bearer')")
label: Label = Field(
default=Label(default="Token", translations={"en": "Token", "fr": "Jeton"}),
description="Label für die Klasse"
)
# Labels für Attribute
field_labels: Dict[str, Label] = {
"access_token": Label(default="Zugriffstoken", translations={"en": "Access token", "fr": "Jeton d'accès"}),
"token_type": Label(default="Token-Typ", translations={"en": "Token type", "fr": "Type de jeton"})
}

File diff suppressed because it is too large Load diff

View file

@ -1,149 +0,0 @@
from pydantic import BaseModel, Field
from typing import List, Dict, Any, Optional
from datetime import datetime
class Label(BaseModel):
"""Label für ein Attribut oder eine Klasse mit Unterstützung für mehrere Sprachen"""
default: str
translations: Dict[str, str] = {}
def get_label(self, language: str = None):
"""Gibt das Label in der angegebenen Sprache zurück, oder den Standardwert wenn nicht verfügbar"""
if language and language in self.translations:
return self.translations[language]
return self.default
class FileItem(BaseModel):
"""Datenmodell für ein Datenobjekt"""
id: int = Field(description="Eindeutige ID des Datenobjekts")
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
user_id: int = Field(description="ID des Erstellers")
name: str = Field(description="Name des Datenobjekts")
type: str = Field(description="Typ des Datenobjekts ('document', 'image', etc.)")
size: Optional[str] = Field(None, description="Größe des Datenobjekts")
upload_date: Optional[str] = Field(None, description="Datum des Hochladens")
content_type: Optional[str] = Field(None, description="Content-Type des Datenobjekts")
path: Optional[str] = Field(None, description="Pfad zum Datenobjekt")
label: Label = Field(
default=Label(default="Datenobjekt", translations={"en": "Data Object", "fr": "Objet de données"}),
description="Label für die Klasse"
)
# Labels für Attribute
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"name": Label(default="Name", translations={"en": "Name", "fr": "Nom"}),
"type": Label(default="Typ", translations={"en": "Type", "fr": "Type"}),
"size": Label(default="Größe", translations={"en": "Size", "fr": "Taille"}),
"upload_date": Label(default="Upload-Datum", translations={"en": "Upload date", "fr": "Date de téléchargement"}),
"content_type": Label(default="Content-Type", translations={"en": "Content type", "fr": "Type de contenu"}),
"path": Label(default="Pfad", translations={"en": "Path", "fr": "Chemin"})
}
class Prompt(BaseModel):
"""Datenmodell für einen Prompt"""
id: int = Field(description="Eindeutige ID des Prompts")
mandate_id: int = Field(description="ID des zugehörigen Mandanten")
user_id: int = Field(description="ID des Erstellers")
content: str = Field(description="Inhalt des Prompts")
name: str = Field(description="Anzeigename des Prompts")
label: Label = Field(
default=Label(default="Prompt", translations={"en": "Prompt", "fr": "Invite"}),
description="Label für die Klasse"
)
# Labels für Attribute
field_labels: Dict[str, Label] = {
"id": Label(default="ID", translations={}),
"mandate_id": Label(default="Mandanten-ID", translations={"en": "Mandate ID", "fr": "ID de mandat"}),
"user_id": Label(default="Benutzer-ID", translations={"en": "User ID", "fr": "ID d'utilisateur"}),
"content": Label(default="Inhalt", translations={"en": "Content", "fr": "Contenu"}),
"name": Label(default="Name", translations={"en": "Label", "fr": "Nom"}),
}
# Neue Workflow-Modellklassen
class DocumentSource(BaseModel):
"""Quelle eines Dokuments im Workflow"""
type: str = Field(description="Typ der Quelle ('prompt', 'file', 'clipboard')")
path: Optional[str] = Field(None, description="Speicherpfad (für Dateien)")
name: str = Field(description="Anzeigename der Datei")
size: Optional[int] = Field(None, description="Größe in Bytes")
lines: Optional[int] = Field(None, description="Zeilenanzahl (für Textdateien)")
content_type: Optional[str] = Field(None, description="MIME-Typ")
upload_date: Optional[str] = Field(None, description="Uploaddatum")
class DocumentContent(BaseModel):
"""Inhalt eines Dokuments im Workflow"""
label: Optional[str] = Field(None, description="Optionale Bezeichnung")
type: str = Field(description="Typ des Inhalts ('text', 'image', 'chart', etc.)")
text: Optional[str] = Field(None, description="Textinhalt")
is_extracted: Optional[bool] = Field(False, description="Flag, ob aus Originaldatei extrahiert")
class Document(BaseModel):
"""Dokument im Workflow (inkl. Prompt und referenzierte Dateien)"""
id: str = Field(description="Eindeutige ID des Dokuments")
source: DocumentSource = Field(description="Quellmetadaten")
contents: List[DocumentContent] = Field(description="Dokumentinhalte")
class DataStats(BaseModel):
"""Statistiken für Performance und Datennutzung"""
processing_time: Optional[float] = Field(None, description="Verarbeitungszeit in Sekunden")
token_count: Optional[int] = Field(None, description="Token-Anzahl (für KI-Modelle)")
bytes_sent: Optional[int] = Field(None, description="Gesendete Bytes")
bytes_received: Optional[int] = Field(None, description="Empfangene Bytes")
class Message(BaseModel):
"""Nachrichtenobjekt im Workflow"""
id: str = Field(description="Eindeutige ID der Nachricht")
workflow_id: str = Field(description="Referenz zum übergeordneten Workflow")
parent_message_id: Optional[str] = Field(None, description="Referenz zur beantworteten Nachricht")
started_at: str = Field(description="Zeitstempel für Nachrichtenerstellung")
finished_at: Optional[str] = Field(None, description="Zeitstempel für Nachrichtenabschluss")
sequence_no: int = Field(description="Sequenznummer für Sortierung")
status: str = Field(description="Status der Nachricht ('pending', 'processing', 'completed', 'failed')")
role: str = Field(description="Rolle des Absenders ('system', 'user', 'assistant')")
data_stats: Optional[DataStats] = Field(None, description="Statistiken")
documents: Optional[List[Document]] = Field(None, description="Dokumente in dieser Nachricht")
content: Optional[str] = Field(None, description="Textinhalt der Nachricht")
agent_type: Optional[str] = Field(None, description="Typ des verwendeten Agenten")
class Workflow(BaseModel):
"""Workflow-Objekt für Multi-Agent-System"""
id: str = Field(description="Eindeutige ID des Workflows")
name: Optional[str] = Field(None, description="Name des Workflows")
mandate_id: int = Field(description="ID des Mandanten")
user_id: int = Field(description="ID des Benutzers")
status: str = Field(description="Status des Workflows ('running', 'failed', 'stopped')")
started_at: str = Field(description="Startzeitpunkt")
last_activity: str = Field(description="Zeitpunkt der letzten Aktivität")
current_round: int = Field(description="Aktuelle Runde")
waiting_for_user: bool = Field(False, description="Flag, ob auf Benutzereingabe gewartet wird")
data_stats: Optional[Dict[str, Any]] = Field(None, description="Gesamt-Statistiken")
messages: List[Message] = Field(default=[], description="Nachrichtenverlauf")
logs: List[Dict[str, Any]] = Field(default=[], description="Protokolleinträge")
# Anfragemodelle für die API
class WorkflowCreateRequest(BaseModel):
"""Anfrage zur Erstellung eines neuen Workflows"""
name: Optional[str] = Field(None, description="Name des Workflows")
prompt: str = Field(description="Zu verwendender Prompt")
files: List[int] = Field(default=[], description="Liste von Datei-IDs")
class UserInputRequest(BaseModel):
"""Anfrage für Benutzereingabe an einen laufenden Workflow"""
message: str = Field(description="Nachricht des Benutzers")
additional_files: List[int] = Field(default=[], description="Liste zusätzlicher Datei-IDs")

View file

@ -31,7 +31,8 @@ import modules.lucydom_interface as lucydom_model
# Konfiguration des Loggers
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
#format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
handlers=[logging.StreamHandler()]
)
logger = logging.getLogger(__name__)

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,7 @@
"""
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
Verwendet einen strukturierten mehrstufigen Prozess zur Erstellung hochwertiger Dokumentation.
Angepasst für das refaktorisierte Core-Modul.
Verwendet einen adaptiven Prozess zur Erstellung hochwertiger Dokumentation basierend auf der Komplexität des Auftrags.
Angepasst für das refaktorisierte Core-Modul und AgentCommunicationProtocol.
"""
import logging
@ -15,428 +15,439 @@ import uuid
from modules.agentservice_base import BaseAgent
from connectors.connector_aichat_openai import ChatService
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
from modules.agentservice_filemanager import FileManager # Import the file manager
logger = logging.getLogger(__name__)
class DocumentationAgent(BaseAgent):
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
"""Agent for creating documentation and structured content"""
def __init__(self):
"""Initialisiert den Dokumentations-Agenten"""
"""Initialize the documentation agent"""
super().__init__()
self.id = "documentation"
self.name = "Dokumentation"
self.id = "documentation_agent"
self.name = "Documentation Specialist"
self.type = "documentation"
self.description = "Erstellt Dokumentation und strukturierte Inhalte"
self.description = "Creates documentation and structured content"
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
self.instructions = """
Du bist der Dokumentations-Agent. Deine Aufgabe:
1. Komplexe Informationen in klare, strukturierte Dokumente umsetzen
2. Verschiedene Dokumentformate erstellen
3. Informationen aus verschiedenen Quellen strukturieren
4. Technische Konzepte verständlich erklären
5. Konsistente Formatierung sicherstellen
"""
self.result_format = "FormattedDocument"
# Chat-Service initialisieren
self.chat_service = None
# Utility-Klassen initialisieren
self.message_utils = MessageUtils()
# Initialize AI service
self.ai_service = None
# Initialize document handler
self.document_handler = None
# Document capabilities
self.supports_documents = True
self.document_capabilities = ["read", "reference", "create"]
self.required_context = ["document_purpose", "target_audience"]
# Initialize protocol
self.protocol = AgentCommunicationProtocol()
# Initialize utilities
self.message_utils = MessageUtils()
# Track the latest generated document
self.last_document = {}
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format
}
def get_base_prompt(self, document_type: str = "") -> str:
"""
Generiert einen Basis-Prompt für den Dokumentations-Agenten.
Args:
document_type: Typ des zu erstellenden Dokuments
Returns:
Basis-Prompt für den Dokumentations-Agenten
"""
# Basis-Prompt
prompt = f"""
Du bist {self.name}, ein {self.type} Agent.
{self.description}
Fähigkeiten: {self.capabilities}
{self.instructions}
"""
# Dokumenttyp-spezifische Anweisungen hinzufügen
if document_type:
prompt += self._get_document_type_instructions(document_type)
return prompt.strip()
def _get_document_type_instructions(self, document_type: str) -> str:
"""
Gibt spezifische Anweisungen für einen bestimmten Dokumenttyp zurück.
Args:
document_type: Typ des Dokuments
Returns:
Spezifische Anweisungen für den Dokumenttyp
"""
document_type = document_type.lower()
if "handbuch" in document_type or "anleitung" in document_type or "guide" in document_type:
return "\n\nHANDBUCH: Beginne mit Zweckbeschreibung, strukturiere in logische Schritte, verwende direkte Anweisungen."
elif "bericht" in document_type or "report" in document_type:
return "\n\nBERICHT: Beginne mit Executive Summary, strukturiere in thematische Abschnitte, halte professionellen Ton."
elif "prozess" in document_type or "process" in document_type:
return "\n\nPROZESS: Beschreibe Zweck, Ziele, Beteiligte, sequenzielle Schritte, Inputs/Outputs und Verantwortlichkeiten."
elif "präsentation" in document_type or "presentation" in document_type:
return "\n\nPRÄSENTATION: Klare Hauptpunkte, visuelle Elemente, Einleitung-Hauptteil-Schluss Struktur."
else:
return "\n\nDOKUMENT: Erstelle ein gut strukturiertes Dokument mit klarer Gliederung und präziser Sprache."
def _detect_document_type(self, message: str) -> str:
"""
Erkennt den Dokumenttyp aus der Nachricht.
Args:
message: Nachricht des Benutzers
Returns:
Erkannter Dokumenttyp
"""
message = message.lower()
if "handbuch" in message or "anleitung" in message or "guide" in message:
return "handbuch"
elif "bericht" in message or "report" in message:
return "bericht"
elif "prozess" in message or "process" in message or "ablauf" in message:
return "prozess"
elif "präsentation" in message or "presentation" in message or "folien" in message:
return "präsentation"
else:
return "dokument"
async def generate_title(self, task: str, document_type: str) -> str:
"""
Generiert einen Titel für das Dokument.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
Returns:
Generierter Titel
"""
prompt = f"""
Erstelle einen prägnanten, professionellen Titel für folgendes {document_type.capitalize()}:
AUFTRAG: {task}
Gib NUR den Titel zurück, ohne weitere Erklärungen oder Formatierungen.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung von Dokumenttiteln."},
{"role": "user", "content": prompt}
]
title = await self.chat_service.call_api(messages)
# Bereinige den Titel von Anführungszeichen und Überschriften-Symbolen
title = title.strip('"\'#*- \n\t')
return title
async def generate_summary(self, task: str, document_type: str, title: str) -> str:
"""
Generiert eine Zusammenfassung für das Dokument.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
Returns:
Generierte Zusammenfassung
"""
prompt = f"""
Erstelle eine prägnante Zusammenfassung für folgendes Dokument:
TITEL: {title}
TYP: {document_type.capitalize()}
AUFTRAG: {task}
Die Zusammenfassung soll einen Überblick über den Zweck und die Hauptinhalte des Dokuments geben.
Sie sollte etwa 3-5 Sätze umfassen und als eigenständiger Abschnitt funktionieren.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung prägnanter Dokumentzusammenfassungen."},
{"role": "user", "content": prompt}
]
summary = await self.chat_service.call_api(messages)
return summary.strip()
async def generate_toc_with_prompts(self, task: str, document_type: str, title: str, summary: str) -> Dict[str, str]:
"""
Generiert ein Inhaltsverzeichnis mit Prompts für die einzelnen Kapitel.
Args:
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
summary: Zusammenfassung des Dokuments
Returns:
Dict mit Kapiteltiteln als Schlüssel und Prompts als Werte
"""
prompt = f"""
Erstelle ein strukturiertes Inhaltsverzeichnis für folgendes Dokument:
TITEL: {title}
TYP: {document_type.capitalize()}
AUFTRAG: {task}
ZUSAMMENFASSUNG: {summary}
Für jedes Kapitel gib auch einen kurzen Prompt an, der beschreibt, was in diesem Kapitel behandelt werden soll.
Formatiere deine Antwort als JSON-Objekt mit folgendem Format:
{{
"Kapitel 1: Titel": "Prompt für Kapitel 1",
"Kapitel 2: Titel": "Prompt für Kapitel 2",
...
}}
Beschränke dich auf sowenige Kapitel wie nötig, die das Thema umfassend behandeln. Schreibe in Prosa und nur als Liste, wenn auch angebracht.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Strukturierung von Dokumenten und die Erstellung von Inhaltsverzeichnissen."},
{"role": "user", "content": prompt}
]
toc_response = await self.chat_service.call_api(messages)
# JSON aus der Antwort extrahieren
import json
import re
# Markdown-Code-Blöcke entfernen, falls vorhanden
toc_response = re.sub(r'```json\s*|\s*```', '', toc_response)
try:
toc_with_prompts = json.loads(toc_response)
return toc_with_prompts
except json.JSONDecodeError as e:
logger.error(f"Fehler beim Parsen des Inhaltsverzeichnisses: {str(e)}")
logger.error(f"Rohe Antwort: {toc_response}")
# Notfall-Fallback
return {
"1. Einleitung": "Einführung in das Thema und Überblick",
"2. Hauptteil": "Hauptinhalte des Dokuments",
"3. Schlussfolgerung": "Zusammenfassung und nächste Schritte"
info = super().get_agent_info()
info.update({
"metadata": {
"document_types": ["manual", "report", "process", "presentation", "document"],
"formats": ["markdown", "text"]
}
})
return info
async def generate_chapter_content(self, chapter_title: str, chapter_prompt: str,
task: str, document_type: str, title: str, summary: str) -> str:
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Generiert den Inhalt für ein bestimmtes Kapitel.
Process a message and create documentation.
Args:
chapter_title: Titel des Kapitels
chapter_prompt: Prompt für das Kapitel
task: Die Aufgabe/Anfrage
document_type: Typ des Dokuments
title: Titel des Dokuments
summary: Zusammenfassung des Dokuments
message: Input message
context: Optional context
Returns:
Generierter Kapitelinhalt
Response with documentation
"""
prompt = f"""
Erstelle detaillierten Inhalt für folgendes Kapitel eines {document_type}s:
# Extract workflow_id from context or message
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
DOKUMENT-TITEL: {title}
AUFGABE: {task}
KAPITEL: {chapter_title}
ANWEISUNG FÜR DIESES KAPITEL: {chapter_prompt}
Der Inhalt sollte detailliert, informativ und gut strukturiert sein.
Verwende bei Bedarf Unterüberschriften, Aufzählungen und Tabellen zur besseren Strukturierung.
Der Inhalt sollte direkt mit dem Kapiteltext beginnen, ohne den Kapiteltitel zu wiederholen.
"""
messages = [
{"role": "system", "content": "Du bist ein Experte für die Erstellung hochwertiger Dokumentationsinhalte."},
{"role": "user", "content": prompt}
]
chapter_content = await self.chat_service.call_api(messages)
return chapter_content.strip()
def _format_final_document(self, title: str, summary: str, toc: Dict[str, str], chapter_contents: Dict[str, str]) -> str:
"""
Formatiert das endgültige Dokument aus allen Teilen.
Args:
title: Titel des Dokuments
summary: Zusammenfassung
toc: Inhaltsverzeichnis (Dict mit Kapiteltiteln als Schlüssel)
chapter_contents: Kapitelinhalte (Dict mit Kapiteltiteln als Schlüssel und Inhalten als Werte)
Returns:
Formatiertes Dokument
"""
# Titel formatieren
doc = f"# {title}\n\n"
# Zusammenfassung hinzufügen
doc += f"## Zusammenfassung\n\n{summary}\n\n"
# Inhaltsverzeichnis hinzufügen
doc += "## Inhaltsverzeichnis\n\n"
for idx, chapter in enumerate(toc.keys(), 1):
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
clean_chapter = chapter
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
clean_chapter = chapter.split('. ', 1)[1]
doc += f"{idx}. {clean_chapter}\n"
doc += "\n"
# Kapitelinhalte hinzufügen
for idx, (chapter, content) in enumerate(chapter_contents.items(), 1):
# Extrahiere den reinen Kapitelnamen (entferne Nummerierung, falls vorhanden)
clean_chapter = chapter
if chapter.strip().startswith(('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')) and '. ' in chapter:
clean_chapter = chapter.split('. ', 1)[1]
doc += f"## {idx}. {clean_chapter}\n\n{content}\n\n"
# Metadaten hinzufügen
doc += "---\n\n"
doc += f"**Erstellt durch:** {self.name}\n"
return doc
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und erstellt Dokumentation in einem strukturierten Prozess.
Args:
message: Die zu verarbeitende Nachricht
workflow: Der aktuelle Workflow
context: Zusätzlicher Kontext
log_func: Funktion für Workflow-Logging
Returns:
Die generierte Dokumentation
"""
# Initialize logging
workflow_id = workflow.get("id", "unknown")
# Get or create logging_utils
log_func = context.get("log_func") if context else None
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"DocumentationAgent startet Dokumentationserstellung", "agents")
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
# Create response structure
response = {
"role": "assistant",
"content": "",
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"result_format": self.result_format,
"workflow_id": workflow_id,
"documents": []
}
try:
# Chat-Service initialisieren, falls noch nicht geschehen
if self.chat_service is None:
self.chat_service = ChatService()
# Task aus der Nachricht extrahieren
task = message.get("content", "")
if context and "task" in context:
task = context["task"]
# Dokumenttyp erkennen
document_type = self._detect_document_type(task)
logging_utils.info(f"Dokumenttyp erkannt: {document_type}", "agents")
# Schritt 1: Titel generieren
title = await self.generate_title(task, document_type)
logging_utils.info(f"Titel generiert: {title}", "agents")
# Schritt 2: Zusammenfassung generieren
summary = await self.generate_summary(task, document_type, title)
logging_utils.info("Zusammenfassung generiert", "agents")
# Schritt 3: Inhaltsverzeichnis mit Prompts generieren
toc_with_prompts = await self.generate_toc_with_prompts(task, document_type, title, summary)
logging_utils.info(f"Inhaltsverzeichnis mit {len(toc_with_prompts)} Kapiteln generiert", "agents")
# Schritt 4: Kapitelinhalte in einer Schleife generieren
chapter_contents = {}
for chapter_title, chapter_prompt in toc_with_prompts.items():
logging_utils.info(f"Generiere Inhalt für Kapitel: {chapter_title}", "agents")
content = await self.generate_chapter_content(
chapter_title, chapter_prompt, task, document_type, title, summary
# Create status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starting document creation",
sender_id=self.id,
status="in_progress",
progress=0.0,
context_id=workflow_id
)
chapter_contents[chapter_title] = content
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Schritt 5: Dokument zusammenführen
final_document = self._format_final_document(title, summary, toc_with_prompts, chapter_contents)
logging_utils.info(f"Dokument fertiggestellt mit {len(final_document)} Zeichen", "agents")
# Extract task from message
task = message.get("content", "")
# Set the content in the response
response["content"] = final_document
# Detect document type
document_type = self._detect_document_type(task)
logging_utils.info(f"Creating {document_type} documentation", "execution")
# Finalize the message
self.message_utils.finalize_message(response)
response["result_format"] = self.result_format
# Process any attached documents
document_context = ""
if message.get("documents"):
logging_utils.info("Processing reference documents", "execution")
document_context = await self._process_documents(message)
# Update progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Reference documents processed",
sender_id=self.id,
status="in_progress",
progress=0.3,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Chat-Service schließen
await self.chat_service.close()
self.chat_service = None
# Enhanced prompt with document context
enhanced_prompt = f"{task}\n\n{document_context}"
# Assess complexity of the task
is_complex = await self._assess_complexity(enhanced_prompt)
# Generate title
title = await self._generate_title(enhanced_prompt, document_type)
logging_utils.info(f"Document title: {title}", "execution")
# Update progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Generating {document_type}: {title}",
sender_id=self.id,
status="in_progress",
progress=0.5,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Generate content based on complexity
if is_complex:
# For complex documents, use the AI service with enhanced prompt
content = await self._generate_complex_document(enhanced_prompt, document_type, title)
logging_utils.info("Complex document generated", "execution")
else:
# For simple documents, use direct generation
content = await self._generate_simple_document(enhanced_prompt, document_type, title)
logging_utils.info("Simple document generated", "execution")
# Final progress update
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Document creation completed",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create a document artifact if document handler is available
if self.document_handler:
doc_id = f"doc_{uuid.uuid4()}"
document = {
"id": doc_id,
"source": {
"type": "generated",
"id": doc_id,
"name": title,
"content_type": "text/markdown",
"size": len(content)
},
"contents": [
{
"type": "text",
"text": content,
"is_extracted": True
}
]
}
# Add document to response
response["documents"].append(document)
# Store the latest document
self.last_document = document
# Update response content to reference the document
response["content"] = f"I've created a document titled '{title}' that contains the requested information. The document is attached to this message."
# If protocol message is required, send it
if context and context.get("require_protocol_message"):
result_message = self.send_document_result(
document_title=title,
document_content=content,
sender_id=self.id,
receiver_id=context.get("receiver_id", "workflow"),
context_id=workflow_id
)
# Just log the message creation
logging_utils.info(f"Created protocol result message: {result_message.id}", "execution")
else:
# If no document handler, just put content in response
response["content"] = content
return response
except Exception as e:
error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}"
error_msg = f"Error in documentation agent: {str(e)}"
logging_utils.error(error_msg, "error")
# Chat-Service schließen bei Fehler
if self.chat_service:
try:
await self.chat_service.close()
except:
pass
self.chat_service = None
# Create error response using protocol
error_message = self.protocol.create_error_message(
error_description=error_msg,
sender_id=self.id,
error_type="documentation",
error_details={"traceback": traceback.format_exc()},
context_id=workflow_id
)
# Create error response
response["content"] = f"## Fehler bei der Dokumentationserstellung\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
# Set error in response
response["content"] = f"## Error creating documentation\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
response["status"] = "error"
return response
async def _assess_complexity(self, task: str) -> bool:
"""
Assess task complexity to determine document structure.
Args:
task: The task description
Returns:
True if complex document needed, False otherwise
"""
if not self.ai_service:
# Default to complex if no AI service
return True
prompt = f"""
Analyze this task and determine if it requires a complex or simple document structure:
{task}
Respond with only "COMPLEX" or "SIMPLE".
"""
try:
response = await self.ai_service.call_api([
{"role": "system", "content": "You determine document complexity requirements."},
{"role": "user", "content": prompt}
])
return "COMPLEX" in response.upper()
except Exception:
# Default to complex on error
return True
async def _generate_title(self, task: str, document_type: str) -> str:
"""
Generate a title for the document.
Args:
task: The task description
document_type: Type of document
Returns:
Generated title
"""
if not self.ai_service:
# Default title if no AI service
return f"{document_type.capitalize()} Document"
prompt = f"""
Create a concise, professional title for this {document_type}:
{task}
Respond with ONLY the title, nothing else.
"""
try:
title = await self.ai_service.call_api([
{"role": "system", "content": "You create document titles."},
{"role": "user", "content": prompt}
])
# Clean up the title
return title.strip('"\'#*- \n\t')
except Exception:
# Default title on error
return f"{document_type.capitalize()} Document"
async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str:
"""
Generate a complex document with structure.
Args:
task: The task description
document_type: Type of document
title: Document title
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nUnable to generate complex document: AI service not available."
prompt = f"""
Create a comprehensive, well-structured {document_type} titled "{title}" based on:
{task}
The document should include:
1. A clear introduction with purpose and scope
2. Logically organized sections with headings
3. Detailed content with examples and evidence
4. A conclusion with key takeaways
5. Appropriate formatting using Markdown
Format the document in Markdown with proper headings, lists, and emphasis.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "You create comprehensive, well-structured documentation."},
{"role": "user", "content": prompt}
])
# Ensure title is at the top
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
return f"# {title}\n\nError generating document: {str(e)}"
async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str:
"""
Generate a simple document without complex structure.
Args:
task: The task description
document_type: Type of document
title: Document title
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nUnable to generate document: AI service not available."
prompt = f"""
Create a concise, focused {document_type} titled "{title}" based on:
{task}
The document should be clear, precise, and to the point without complex chapter structure.
Format using Markdown with appropriate headings and formatting.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "You create concise, focused documentation."},
{"role": "user", "content": prompt}
])
# Ensure title is at the top
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
return f"# {title}\n\nError generating document: {str(e)}"
def _detect_document_type(self, message: str) -> str:
"""
Detect document type from the message.
Args:
message: User message
Returns:
Detected document type
"""
message = message.lower()
if any(term in message for term in ["manual", "guide", "instruction", "tutorial"]):
return "manual"
elif any(term in message for term in ["report", "analysis", "assessment", "review"]):
return "report"
elif any(term in message for term in ["process", "workflow", "procedure", "steps"]):
return "process"
elif any(term in message for term in ["presentation", "slides", "deck"]):
return "presentation"
else:
return "document"
def send_document_result(self, document_title: str, document_content: str,
sender_id: str, receiver_id: str, context_id: str = None) -> AgentMessage:
"""Send a document result using the protocol"""
metadata = {
"document_type": self._detect_document_type(document_content),
"title": document_title,
"created_at": datetime.now().isoformat()
}
return self.protocol.create_result_message(
result_content=document_content,
sender_id=sender_id,
receiver_id=receiver_id,
task_id=f"doc_{uuid.uuid4()}",
output_data=metadata,
result_format=self.result_format,
context_id=context_id
)
def send_error_message(self, error_description: str, sender_id: str, receiver_id: str = None,
context_id: str = None) -> AgentMessage:
"""Send an error message using the protocol"""
return self.protocol.create_error_message(
error_description=error_description,
sender_id=sender_id,
receiver_id=receiver_id,
error_type="documentation_error",
error_details={"timestamp": datetime.now().isoformat()},
context_id=context_id
)
# Singleton-Instanz
# Singleton instance
_documentation_agent = None
def get_documentation_agent():
"""Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
"""Returns a singleton instance of the documentation agent"""
global _documentation_agent
if _documentation_agent is None:
_documentation_agent = DocumentationAgent()
return _documentation_agent
return _documentation_agent

View file

@ -1,209 +0,0 @@
"""
Sharepoint-Agent für die Interaktion mit Sharepoint-Ressourcen und Dokumenten.
Angepasst für das refaktorisierte Core-Modul.
"""
import logging
import traceback
from typing import List, Dict, Any, Optional, Union
from datetime import datetime
import uuid
from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
logger = logging.getLogger(__name__)
class SharepointAgent(BaseAgent):
"""Agent für den Zugriff auf und die Arbeit mit SharePoint-Ressourcen"""
def __init__(self):
"""Initialisiert den SharePoint-Agenten"""
super().__init__()
self.id = "sharepoint"
self.name = "SharePoint-Agent"
self.type = "sharepoint"
self.description = "Zugriff auf und Arbeit mit SharePoint-Ressourcen"
self.capabilities = "document_search,metadata_extraction,content_integration,sharepoint_interaction"
self.instructions = """
Du bist der SharePoint-Agent, ein Spezialist für die Interaktion mit Microsoft SharePoint. Deine Aufgabe ist es:
1. SharePoint-Dokumente und -Ressourcen zu durchsuchen und abzurufen
2. Metadaten aus SharePoint-Dokumenten zu extrahieren und zu analysieren
3. Strukturierte Informationen aus SharePoint-Bibliotheken zu sammeln
4. Dokumente basierend auf Metadaten zu filtern und zu organisieren
5. Inhalte aus verschiedenen SharePoint-Quellen zu integrieren und zusammenzuführen
6. Informationen aus SharePoint-Listen und -Dokumentbibliotheken zu extrahieren
7. Zusammenfassungen und Analysen von SharePoint-Inhalten zu erstellen
Bei der Darstellung deiner Ergebnisse:
- Strukturiere die Informationen klar und übersichtlich
- Gib den Ursprung und die Metadaten der Dokumente an
- Zeige Beziehungen zwischen verschiedenen Dokumenten und Ressourcen auf
- Hebe wichtige Erkenntnisse und Muster hervor
- Biete Kontext und Relevanz für die gefundenen Informationen
"""
self.result_format = "DocumentList"
# Utility-Klassen initialisieren
self.message_utils = MessageUtils()
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format
}
def get_prompt(self, message_context: Dict[str, Any]) -> str:
"""
Generiert einen angepassten Prompt für den SharePoint-Agenten.
Args:
message_context: Kontext der Nachricht
Returns:
Formatierter Prompt für den SharePoint-Agenten
"""
# Basis-Prompt vom BaseAgent holen
base_prompt = super().get_prompt(message_context)
# Zusätzliche Anweisungen für SharePoint-Interaktion
sharepoint_instructions = """
SHAREPOINT-INTERAKTIONS-RICHTLINIEN:
1. Präzisiere die Suchkriterien für SharePoint-Ressourcen
2. Identifiziere relevante Bibliotheken, Listen und Standorte
3. Definiere benötigte Metadaten und Inhalte
4. Berücksichtige Berechtigungsanforderungen
5. Priorisiere aktuelle und relevante Dokumente
6. Stelle eine strukturierte Darstellung der Ergebnisse sicher
Für eine gute SharePoint-Integration:
- Gib detaillierte Pfade und Standorte an
- Berücksichtige verschiedene Dokumenttypen und Formate
- Zeige Metadaten und Dokumenteigenschaften
- Biete Kontext zu den gefundenen Ressourcen
- Berücksichtige Versionsinformationen
"""
# Task aus dem Kontext extrahieren
task = message_context.get("task", "")
task_instructions = f"\nSHAREPOINT-AUFTRAG:\n{task}\n" if task else ""
# Vollständigen Prompt zusammenbauen
complete_prompt = f"{base_prompt}\n\n{sharepoint_instructions}\n{task_instructions}"
return complete_prompt.strip()
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und interagiert mit SharePoint.
Args:
message: Die zu verarbeitende Nachricht
workflow: Der aktuelle Workflow
context: Zusätzlicher Kontext
log_func: Funktion für Workflow-Logging
Returns:
Die generierte Antwort mit SharePoint-Inhalten
"""
# Initialize logging
workflow_id = workflow.get("id", "unknown")
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"SharePointAgent startet SharePoint-Interaktion", "agents")
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
try:
# Hier würde die tatsächliche Interaktion mit SharePoint stattfinden
# In der finalen Implementierung würde ein SharePoint-Connector verwendet werden
# Als Beispiel generieren wir eine Standardantwort
logging_utils.info("SharePoint-Suche wird simuliert", "agents")
sharepoint_content = f"""Ich habe als {self.name} die SharePoint-Ressourcen durchsucht und folgende Ergebnisse gefunden:
## SharePoint-Suchergebnisse
Basierend auf deiner Anfrage habe ich folgende relevante Dokumente identifiziert:
### Dokumente
1. **Projektplan_2025.docx** (Letzte Änderung: 15.03.2025)
- Standort: Projekte/Strategische Planung
- Autor: Maria Schmidt
- Schlüsselinhalt: Zeitplan für Q2-Q4 2025, Ressourcenplanung, Meilensteine
2. **Marktanalyse_Q1_2025.pptx** (Letzte Änderung: 22.02.2025)
- Standort: Marketing/Marktforschung
- Autor: Thomas Müller
- Schlüsselinhalt: Aktuelle Markttrends, Wettbewerbsanalyse, Chancen und Risiken
3. **Budgetplanung_2025.xlsx** (Letzte Änderung: 01.03.2025)
- Standort: Finanzen/Planung
- Autor: Sarah Weber
- Schlüsselinhalt: Detaillierte Budgetaufschlüsselung nach Abteilungen und Quartalen
### SharePoint-Listen
1. **Projektstatusliste**
- 12 Einträge mit relevanten Projektstatusinformationen
- Letzte Aktualisierung: 25.03.2025
## Zusammenfassung der Inhalte
Die gefundenen Dokumente zeigen übereinstimmend, dass:
- Der Fokus im Jahr 2025 auf der Expansion in neue Märkte liegt
- Das Budget für Forschung und Entwicklung um 15% erhöht wurde
- Drei neue Hauptprojekte im zweiten Quartal starten werden
## Empfehlungen
Basierend auf den gefundenen Informationen empfehle ich:
1. Die Projektpläne für Q2 mit besonderem Fokus auf die neuen Hauptprojekte zu prüfen
2. Die Ressourcenzuweisung entsprechend der Budgeterhöhung anzupassen
3. Die Marktanalyse als Grundlage für die Expansionsstrategie zu verwenden
Die Dokumente sind alle aktuell und wurden von den verantwortlichen Fachabteilungen erstellt."""
logging_utils.info("SharePoint-Ergebnisse zusammengestellt", "agents")
# Set the content in the response
response["content"] = sharepoint_content
# Finalize the message
self.message_utils.finalize_message(response)
response["result_format"] = self.result_format
return response
except Exception as e:
error_msg = f"Fehler bei der SharePoint-Interaktion: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
response["content"] = f"## Fehler bei der SharePoint-Interaktion\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
return response
# Singleton-Instanz
_sharepoint_agent = None
def get_sharepoint_agent():
"""Gibt eine Singleton-Instanz des SharePoint-Agenten zurück"""
global _sharepoint_agent
if _sharepoint_agent is None:
_sharepoint_agent = SharepointAgent()
return _sharepoint_agent

View file

@ -19,11 +19,13 @@ import requests
from modules.agentservice_base import BaseAgent
from connectors.connector_aichat_openai import ChatService
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
from modules.agentservice_protocol import AgentMessage, AgentCommunicationProtocol
logger = logging.getLogger(__name__)
class WebcrawlerAgent(BaseAgent):
"""Agent für Web-Recherche und Informationsbeschaffung"""
def __init__(self):
@ -36,6 +38,14 @@ class WebcrawlerAgent(BaseAgent):
self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
self.result_format = "SearchResults"
# Add enhanced document capabilities
self.supports_documents = True
self.document_capabilities = ["read", "create"]
self.required_context = ["workflow_id"]
# Initialize protocol
self.protocol = AgentCommunicationProtocol()
# Chat-Service initialisieren
self.chat_service = ChatService()
@ -61,74 +71,130 @@ class WebcrawlerAgent(BaseAgent):
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format,
info = super().get_agent_info()
info.update({
"metadata": {
"max_url": self.max_url,
"max_result": self.max_result,
"timeout": self.timeout
}
}
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
})
return info
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
Args:
message: Die zu verarbeitende Nachricht
workflow: Der aktuelle Workflow
context: Zusätzlicher Kontext
log_func: Funktion für Workflow-Logging
Returns:
Die generierte Antwort mit der Web-Recherche
"""
# Initialize logging
workflow_id = workflow.get("id", "unknown")
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"WebcrawlerAgent startet Web-Recherche", "agents")
# Extract workflow_id from context or message
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
# Get or create logging_utils
log_func = context.get("log_func") if context else None
logging_utils = LoggingUtils(workflow_id, log_func)
# Send status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starte Web-Recherche",
sender_id=self.id,
status="in_progress",
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create response structure
response = {
"role": "assistant",
"content": "",
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"result_format": self.result_format,
"workflow_id": workflow_id
}
try:
# Get the query from the message
prompt = await self.get_prompt(message)
logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents")
# Führe die Web-Recherche durch und warte auf das Ergebnis mit await
# Update progress using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Recherchiere: {prompt[:30]}...",
sender_id=self.id,
status="in_progress",
progress=0.3,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Führe die Web-Recherche durch
web_query_result = await self.get_web_query(message)
logging_utils.info("Web-Recherche abgeschlossen", "agents")
# Final status update
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Web-Recherche abgeschlossen",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Set the content in the response
response["content"] = web_query_result
# Finalize the message
self.message_utils.finalize_message(response)
response["result_format"] = self.result_format
return response
except Exception as e:
error_msg = f"Fehler bei der Web-Recherche: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
# Create error response using protocol
error_message = self.protocol.create_error_message(
error_description=error_msg,
sender_id=self.id,
error_type="web_search",
error_details={"traceback": traceback.format_exc()},
context_id=workflow_id
)
response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
return response
def send_document_request(self, document_description: str, sender_id: str, receiver_id: str, filters: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
"""Send a document request using the protocol"""
return self.protocol.create_document_request_message(
document_description=document_description,
sender_id=sender_id,
receiver_id=receiver_id,
filters=filters,
context_id=context_id
)
def send_result_message(self, result_content: str, sender_id: str, receiver_id: str, task_id: str,
output_data: Dict[str, Any] = None, context_id: str = None) -> AgentMessage:
"""Send a result message using the protocol"""
return self.protocol.create_result_message(
result_content=result_content,
sender_id=sender_id,
receiver_id=receiver_id,
task_id=task_id,
output_data=output_data,
result_format="SearchResults",
context_id=context_id
)
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
task = message_context.get("content", "")
@ -142,17 +208,29 @@ class WebcrawlerAgent(BaseAgent):
logger.info(f"Web analysis prompt '{prompt}' delivers {len(result_json)} results.")
if isinstance(result_json, list):
total_tokens = 0
for i, result in enumerate(result_json, 1):
# Limit content size for each result
result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000) # Allow ~15000 tokens per result
web_answer_instructions = f"""
Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
Dies ist das Resultat:
{result['data']}
{result_data_limited}
"""
# Count tokens in the instructions to ensure we don't exceed API limits
instruction_tokens = self.count_tokens(web_answer_instructions)
if total_tokens + instruction_tokens > 60000:
logger.warning(f"Skipping result {i} to avoid exceeding token limit")
break
total_tokens += instruction_tokens
# Zusätzliche Anweisungen für Web-Recherche
content_text = await self.chat_service.call_api(
messages=[
@ -166,16 +244,25 @@ class WebcrawlerAgent(BaseAgent):
}
]
)
result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_text}"
summary_src+=f"\n{content_text}"
# Create a summary but ensure we stay within token limits
content_summary = content_text[:2000] # Limit to ~2000 characters
result_data += f"\n\n[{i}] {result['title']}\nURL: {result['url']}\nSnippet: {result['snippet']}\nContent: {content_summary}"
summary_src += f"\n{content_summary}"
# Update token count
total_tokens += self.count_tokens(content_summary) + 100 # Add buffer for formatting
else:
result_data = "no data received"
logger.info(f"Web analysis result sent {len(result_data)}B")
# Zusätzliche Zusammenfassung
summary=""
if len(summary_src)>1:
summary = ""
if len(summary_src) > 1:
# Limit summary source to ensure we don't exceed API limits
summary_src_limited = self.limit_text_for_api(summary_src, max_tokens=10000)
summary = await self.chat_service.call_api(
messages=[
{
@ -184,7 +271,7 @@ class WebcrawlerAgent(BaseAgent):
},
{
"role": "user",
"content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src}\n"
"content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src_limited}\n"
}
]
)
@ -193,231 +280,323 @@ class WebcrawlerAgent(BaseAgent):
result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
return result
async def run_web_query(self, prompt: str) -> List[Dict]:
if prompt=="":
return []
async def run_web_query(self, prompt: str) -> List[Dict]:
if prompt=="":
return []
ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
ptext=f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
"""
content_text = await self.chat_service.call_api(
messages=[
{
"role": "system",
"content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
},
{
"role": "user",
"content": ptext
}
]
)
# Remove markdown formatting if present
if content_text.startswith("```json"):
# Find the end of the JSON block
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
# Extract the JSON content without the markdown markers
content_text = content_text[7:end_index].strip()
# Now parse the JSON
try:
logger.info(f"Valid json received: {str(content_text)}")
pjson = json.loads(content_text)
# Now call scrape_json with the parsed dictionary
result_json = await self.scrape_json(pjson)
return result_json
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
logger.error(f"Cleaned content: {content_text[:100]}...")
return []
async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
"""
Scrapes web content based on a research strategy JSON.
Args:
research_strategy: A dictionary containing:
- 'skey': List of search keywords
- 'url': List of direct URLs to scrape
Returns:
Dictionary with URLs as keys and scraped content as values
"""
logger.info("Starting JSON-based web scraping")
results = []
# Validate input structure
if not isinstance(research_strategy, dict):
logger.error("Invalid research_strategy format: not a dictionary")
return {"error": "Invalid research_strategy format: not a dictionary"}
keys = research_strategy.get("skey", [])
direct_urls = research_strategy.get("url", [])
if not isinstance(keys, list) or not isinstance(direct_urls, list):
logger.error("Invalid research_strategy format: keys, or url is not a list")
return {"error": "Invalid research_strategy format: keys, or url is not a list"}
# Process search keywords through search engine
for keyword in keys:
logger.info(f"Processing keyword: {keyword}")
found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data
logger.info(f"... {len(found_results)} results found")
results.extend(found_results)
# Process direct URLs
logger.info(f"Processing {len(direct_urls)} direct URLs")
for url in direct_urls:
if url in results:
logger.info(f"Skipping already scraped URL: {url}")
continue
soup=self.read_url(url)
# Extract title from the page if it exists
if isinstance(soup, BeautifulSoup):
title_tag = soup.find('title')
title = title_tag.text.strip() if title_tag else "No title"
# Alternative: You could also look for h1 tags if the title tag is missing
if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
else:
# Handle the case where soup is an error message string
title = "Error fetching page"
results.append(self.parse_result(soup,"No title",url))
logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
return results
def search_web(self, query: str) -> List[Dict]:
formatted_query = quote_plus(query)
url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
search_results_soup = self.read_url(url)
if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
return []
# Extract search results
results = []
# Find all result containers
result_elements = search_results_soup.select('.result')
for result in result_elements:
# Extract title
title_element = result.select_one('.result__a')
title = title_element.text.strip() if title_element else 'No title'
'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
# Extract URL (DuckDuckGo uses redirects, need to extract from href param)
url_element = title_element.get('href') if title_element else ''
extracted_url = 'No URL'
Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
"""
content_text = await self.chat_service.call_api(
messages=[
{
"role": "system",
"content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
},
{
"role": "user",
"content": ptext
}
]
)
# Remove markdown formatting if present
if content_text.startswith("```json"):
# Find the end of the JSON block
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
# Extract the JSON content without the markdown markers
content_text = content_text[7:end_index].strip()
if url_element:
# Extract the actual URL from DuckDuckGo's redirect
if url_element.startswith('/d.js?q='):
start = url_element.find('?q=') + 3 # Skip '?q='
end = url_element.find('&', start) if '&' in url_element[start:] else None
extracted_url = unquote(url_element[start:end])
# Now parse the JSON
try:
logger.info(f"Valid json received: {str(content_text)}")
pjson = json.loads(content_text)
# Now call scrape_json with the parsed dictionary
result_json = await self.scrape_json(pjson)
return result_json
except json.JSONDecodeError as e:
logger.error(f"Failed to parse JSON: {e}")
logger.error(f"Cleaned content: {content_text[:100]}...")
return []
async def scrape_json(self, research_strategy: Dict[str, List]) -> List[Dict]:
"""
Scrapes web content based on a research strategy JSON.
Args:
research_strategy: A dictionary containing:
- 'skey': List of search keywords
- 'url': List of direct URLs to scrape
# Make sure the URL has the correct protocol prefix
if not extracted_url.startswith(('http://', 'https://')):
if not extracted_url.startswith('//'):
extracted_url = 'https://' + extracted_url
else:
extracted_url = 'https:' + extracted_url
else:
extracted_url = url_element
# Extract snippet directly from search results page
snippet_element = result.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
# Now fetch the actual page content for the data field
target_page_soup = self.read_url(extracted_url)
results.append({
'title': title,
'url': extracted_url,
'snippet': snippet,
'data': str(target_page_soup) if isinstance(target_page_soup, BeautifulSoup) else "Error fetching page"
})
# Limit the number of results if needed
if len(results) >= self.max_result:
break
return results
Returns:
Dictionary with URLs as keys and scraped content as values
"""
def read_url(self, url: str) -> BeautifulSoup:
"""
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
Args:
url: Die zu lesende URL
logger.info("Starting JSON-based web scraping")
results = []
Returns:
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
import time
# Validate input structure
if not isinstance(research_strategy, dict):
logger.error("Invalid research_strategy format: not a dictionary")
return {"error": "Invalid research_strategy format: not a dictionary"}
# Initialer Request
response = requests.get(url, headers=headers, timeout=10)
keys = research_strategy.get("skey", [])
direct_urls = research_strategy.get("url", [])
# Polling für Status 202
if response.status_code == 202:
# Maximal 3 Versuche mit steigenden Intervallen
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s
if not isinstance(keys, list) or not isinstance(direct_urls, list):
logger.error("Invalid research_strategy format: keys, or url is not a list")
return {"error": "Invalid research_strategy format: keys, or url is not a list"}
# Process search keywords through search engine
for keyword in keys:
logger.info(f"Processing keyword: {keyword}")
found_results = self.search_web(keyword) # List with Dict: title,url,snippet,data
logger.info(f"... {len(found_results)} results found")
results.extend(found_results)
# Process direct URLs
logger.info(f"Processing {len(direct_urls)} direct URLs")
for url in direct_urls:
if url in results:
logger.info(f"Skipping already scraped URL: {url}")
continue
soup=self.read_url(url)
# Extract title from the page if it exists
if isinstance(soup, BeautifulSoup):
title_tag = soup.find('title')
title = title_tag.text.strip() if title_tag else "No title"
# Alternative: You could also look for h1 tags if the title tag is missing
if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
else:
# Handle the case where soup is an error message string
title = "Error fetching page"
results.append(self.parse_result(soup,"No title",url))
logger.info(f"JSON scraping completed. Scraped {len(results)} URLs in total")
return results
def extract_main_content(self, soup: BeautifulSoup, max_chars: int = 30000) -> str:
"""
Extract the main content from an HTML page while limiting character count.
Args:
soup: BeautifulSoup object containing the page content
max_chars: Maximum number of characters to extract
for wait_time in backoff_times:
time.sleep(wait_time) # Warten mit steigender Zeit
Returns:
Extracted main content as string
"""
if not isinstance(soup, BeautifulSoup):
return str(soup)[:max_chars]
# Try to find main content elements in order of priority
main_content = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
if content:
main_content = content
break
# If no main content found, use the body
if not main_content:
main_content = soup.find('body') or soup
# Remove script, style, nav, footer elements that don't contribute to main content
for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Extract text content
text_content = main_content.get_text(separator=' ', strip=True)
# Limit to max_chars
return text_content[:max_chars]
def tokenize_for_counting(self, text: str) -> List[str]:
"""
Simple token counter for estimating token usage.
This is an approximation since the exact tokenization depends on the model.
Args:
text: Input text
Returns:
List of tokens
"""
# Simple tokenization by splitting on whitespace and punctuation
import re
return re.findall(r'\w+|[^\w\s]', text)
def count_tokens(self, text: str) -> int:
"""
Count the approximate number of tokens in a text.
Args:
text: Input text
Returns:
Estimated token count
"""
tokens = self.tokenize_for_counting(text)
return len(tokens)
def limit_text_for_api(self, text: str, max_tokens: int = 60000) -> str:
"""
Limit the text to a maximum number of tokens.
Args:
text: Input text
max_tokens: Maximum number of tokens allowed
Returns:
Limited text
"""
if not text:
return ""
tokens = self.tokenize_for_counting(text)
# If text is already under the limit, return as is
if len(tokens) <= max_tokens:
return text
# Otherwise, truncate text to max_tokens
return " ".join(tokens[:max_tokens]) + "... [content truncated due to length]"
def search_web(self, query: str) -> List[Dict]:
formatted_query = quote_plus(query)
url = f"https://html.duckduckgo.com/html/?q={formatted_query}"
search_results_soup = self.read_url(url)
if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
return []
# Extract search results
results = []
# Find all result containers
result_elements = search_results_soup.select('.result')
for result in result_elements:
# Extract title
title_element = result.select_one('.result__a')
title = title_element.text.strip() if title_element else 'No title'
# Extract URL (DuckDuckGo uses redirects, need to extract from href param)
url_element = title_element.get('href') if title_element else ''
extracted_url = 'No URL'
if url_element:
# Extract the actual URL from DuckDuckGo's redirect
if url_element.startswith('/d.js?q='):
start = url_element.find('?q=') + 3 # Skip '?q='
end = url_element.find('&', start) if '&' in url_element[start:] else None
extracted_url = unquote(url_element[start:end])
# Make sure the URL has the correct protocol prefix
if not extracted_url.startswith(('http://', 'https://')):
if not extracted_url.startswith('//'):
extracted_url = 'https://' + extracted_url
else:
extracted_url = 'https:' + extracted_url
else:
extracted_url = url_element
# Extract snippet directly from search results page
snippet_element = result.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
# Now fetch the actual page content for the data field
target_page_soup = self.read_url(extracted_url)
# Use the new content extraction method to limit content size
content = self.extract_main_content(target_page_soup, max_chars=30000)
results.append({
'title': title,
'url': extracted_url,
'snippet': snippet,
'data': content
})
# Limit the number of results if needed
if len(results) >= self.max_result:
break
return results
def read_url(self, url: str) -> BeautifulSoup:
"""
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
Args:
url: Die zu lesende URL
Returns:
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
import time
# Initialer Request
response = requests.get(url, headers=headers, timeout=10)
# Wenn kein 202 mehr, dann abbrechen
if response.status_code != 202:
break
# Für andere Fehler-Status einen Fehler auslösen
response.raise_for_status()
# HTML parsen
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
# Leeres BeautifulSoup-Objekt erstellen
return BeautifulSoup("<html><body></body></html>", 'html.parser')
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
# Extract snippet/description
snippet_element = data.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
result={
'title': title,
'url': url,
'snippet': snippet,
'data': data.prettify()
}
return result
# Polling für Status 202
if response.status_code == 202:
# Maximal 3 Versuche mit steigenden Intervallen
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s
for wait_time in backoff_times:
time.sleep(wait_time) # Warten mit steigender Zeit
response = requests.get(url, headers=headers, timeout=10)
# Wenn kein 202 mehr, dann abbrechen
if response.status_code != 202:
break
# Für andere Fehler-Status einen Fehler auslösen
response.raise_for_status()
# HTML parsen
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
# Leeres BeautifulSoup-Objekt erstellen
return BeautifulSoup("<html><body></body></html>", 'html.parser')
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
# Extract snippet/description
snippet_element = data.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'No description'
result={
'title': title,
'url': url,
'snippet': snippet,
'data': data.prettify()
}
return result
# Singleton-Instanz
_webcrawler_agent = None

View file

@ -1,36 +1,61 @@
"""
Erweiterte Basisklasse für Agenten im Agentservice.
Dieser Modul stellt eine Basis-Agent-Klasse mit Rückgabeformat-Attribut für spezialisierte Agenten bereit.
Enhanced base agent class for the Agentservice.
Provides improved communication and document handling capabilities.
"""
import logging
from typing import List, Dict, Any, Optional, Tuple
import json
from typing import Dict, Any, List, Optional, Tuple, Union
import asyncio
from datetime import datetime
import uuid
logger = logging.getLogger(__name__)
class BaseAgent:
"""Basisklasse für alle Agenten im System"""
"""
Enhanced base agent class with improved communication capabilities.
All specialized agents should inherit from this class.
"""
def __init__(self):
"""Initialisiert den Basis-Agenten"""
"""Initialize the enhanced agent."""
self.id = "base_agent"
self.name = "Base Agent"
self.type = "base"
self.description = "Basisagent als Vorlage für spezialisierte Agenten"
self.capabilities = "Grundlegende Agentenoperationen"
self.instructions = """
Als Basis-Agent kannst du grundlegende Aufgaben erledigen.
Diese Anweisungen sollten von spezialisierten Agenten überschrieben werden.
self.description = "Base agent for the Agentservice"
self.capabilities = "Basic agent operations"
self.result_format = "Text"
# New properties for document handling
self.supports_documents = True
self.document_capabilities = ["read", "reference"]
self.required_context = []
# System dependencies
self.ai_service = None
self.document_handler = None
self.lucydom_interface = None
def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None):
"""
# Neues Attribut für das Rückgabeformat
self.result_format = "Text" # Standard: Textformat
Set system dependencies.
Args:
ai_service: AI service for text generation
document_handler: Document handler for document operations
lucydom_interface: LucyDOM interface for database access
"""
self.ai_service = ai_service
self.document_handler = document_handler
self.lucydom_interface = lucydom_interface
def get_agent_info(self) -> Dict[str, Any]:
"""
Gibt Informationen über den Agenten zurück.
Get detailed information about the agent.
Returns:
Dict mit Agenten-Informationen
Dictionary with agent information
"""
return {
"id": self.id,
@ -38,87 +63,198 @@ class BaseAgent:
"type": self.type,
"description": self.description,
"capabilities": self.capabilities,
"instructions": self.instructions,
"result_format": self.result_format, # Rückgabeformat hinzugefügt
"used": False, # Wird zur Laufzeit aktualisiert
"last_result_status": None # Wird zur Laufzeit aktualisiert
"result_format": self.result_format,
"supports_documents": self.supports_documents,
"document_capabilities": self.document_capabilities,
"required_context": self.required_context
}
def get_prompt(self, message_context: Dict[str, Any]) -> str:
def get_capabilities(self) -> List[str]:
"""
Generiert einen an den Agenten angepassten Prompt basierend auf Kontext.
Get a list of agent capabilities.
Args:
message_context: Kontext der Nachricht
Returns:
Formatierter Prompt für den Agenten
List of capability strings
"""
# Basis-Prompt, der von spezialisierten Agenten überschrieben werden kann
base_prompt = f"""
Du bist {self.name}, ein {self.type} Agent.
{self.description}
Deine Fähigkeiten: {self.capabilities}
{self.instructions}
Rückgabeformat: {self.result_format}
Formatiere deine Antwort klar und strukturiert. Beantworte alle Aspekte der Anfrage.
Deklariere am Ende deiner Antwort den Status deines Ergebnisses:
[STATUS: ERGEBNIS] - Wenn du ein vollständiges, konkretes Ergebnis geliefert hast
[STATUS: TEILWEISE] - Wenn du ein teilweises Ergebnis geliefert hast
[STATUS: PLAN] - Wenn du nur einen Plan vorgeschlagen hast
# Split capabilities into a list
if isinstance(self.capabilities, str):
return [cap.strip() for cap in self.capabilities.split(",")]
return []
def get_supported_formats(self) -> List[str]:
"""
Get supported output formats.
return base_prompt.strip()
Returns:
List of supported format strings
"""
if isinstance(self.result_format, str):
return [fmt.strip() for fmt in self.result_format.split(",")]
return ["Text"]
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und generiert eine Antwort.
Sollte von spezialisierten Agenten überschrieben werden.
Process a message and generate a response.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext (optional)
message: Input message
context: Optional context information
Returns:
Die generierte Antwort
Response message
"""
# Basis-Implementierung, die einfach eine Standardantwort zurückgibt
return {
"role": "assistant",
"content": f"Ich bin {self.name} und habe deine Anfrage erhalten. Allerdings bin ich nur eine Basisimplementierung ohne spezifische Funktionalität. [STATUS: PLAN]",
"agent_type": self.type,
"result_format": self.result_format # Rückgabeformat in der Antwort
}
# Basic implementation - should be overridden by specialized agents
if not self.ai_service:
logger.warning(f"Agent {self.id} has no AI service configured")
return {
"role": "assistant",
"content": f"I'm {self.name}, but I'm not properly configured. Please set up the AI service.",
"agent_id": self.id,
"agent_type": self.type,
"result_format": "Text"
}
# Process documents if available and set up document handler
document_context = ""
if self.supports_documents and self.document_handler and message.get("documents"):
document_context = await self._process_documents(message)
# Create enhanced prompt
prompt = self._create_enhanced_prompt(message, document_context, context)
# Generate response
try:
response_content = await self.ai_service.call_api([
{"role": "system", "content": self._get_system_prompt()},
{"role": "user", "content": prompt}
])
# Process the response to extract any special instructions or status
content, status = self._process_response(response_content)
return {
"role": "assistant",
"content": content,
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"result_format": self.result_format,
"status": status,
"workflow_id": message.get("workflow_id"),
"documents": message.get("documents", []) # Pass through documents
}
except Exception as e:
logger.error(f"Error in agent {self.id}: {str(e)}")
return {
"role": "assistant",
"content": f"I encountered an error: {str(e)}",
"agent_id": self.id,
"agent_type": self.type,
"result_format": "Text",
"status": "error"
}
def extract_status(self, content: str) -> Tuple[str, str]:
async def _process_documents(self, message: Dict[str, Any]) -> str:
"""
Extrahiert den Status aus dem Inhalt der Antwort.
Process documents in the message.
Args:
content: Inhalt der Antwort
message: Input message with documents
Returns:
Tuple mit (bereinigter Text, Status)
Document context as text
"""
import re
# Simply extract text from documents
if not self.document_handler:
return ""
# Standard-Status, falls keine Deklaration gefunden wird
status = "UNBEKANNT"
return self.document_handler.merge_document_contents(message)
def _create_enhanced_prompt(self, message: Dict[str, Any], document_context: str, context: Dict[str, Any] = None) -> str:
"""
Create an enhanced prompt with context.
# Suche nach Status-Deklaration
status_pattern = r'\[STATUS:\s*(ERGEBNIS|TEILWEISE|PLAN)\]'
match = re.search(status_pattern, content, re.IGNORECASE)
if match:
# Extrahiere den Status
status = match.group(1).upper()
Args:
message: Input message
document_context: Document context
context: Optional additional context
# Entferne die Status-Deklaration aus dem Text
content = re.sub(status_pattern, '', content, flags=re.IGNORECASE).strip()
Returns:
Enhanced prompt
"""
prompt = message.get("content", "")
return content, status
# Add document context if available
if document_context:
prompt += f"\n\n=== DOCUMENT CONTEXT ===\n{document_context}"
# Add any additional context
if context:
# Add expected format if specified
if "expected_format" in context:
prompt += f"\n\nPlease format your response as: {context['expected_format']}"
# Add dependency outputs if available
if "dependency_outputs" in context:
prompt += "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n"
for key, value in context["dependency_outputs"].items():
if isinstance(value, dict) and "content" in value:
prompt += f"\n--- {key} ---\n{value['content']}\n"
else:
prompt += f"\n--- {key} ---\n{str(value)}\n"
return prompt
def _get_system_prompt(self) -> str:
"""
Get the system prompt for the agent.
Returns:
System prompt string
"""
return f"""
You are {self.name}, a specialized {self.type} agent.
{self.description}
Your capabilities include: {self.capabilities}
You should format your responses according to: {self.result_format}
Respond clearly and helpfully to the user's request.
When appropriate, include a status indicator at the end of your message:
[STATUS: COMPLETE] - When you've fully addressed the request
[STATUS: PARTIAL] - When you've partially addressed the request
[STATUS: QUESTION] - When you need more information
"""
def _process_response(self, response: str) -> Tuple[str, str]:
"""
Process the response to extract status and clean content.
Args:
response: Raw response from the AI
Returns:
Tuple of (cleaned content, status)
"""
# Default status
status = "complete"
# Check for status tags
import re
status_match = re.search(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', response, re.IGNORECASE)
if status_match:
status_value = status_match.group(1).lower()
# Remove the status tag
content = re.sub(r'\[STATUS:\s*(COMPLETE|PARTIAL|QUESTION)\]', '', response, flags=re.IGNORECASE).strip()
return content, status_value
return response, status
# Factory functions
def get_enhanced_base_agent() -> BaseAgent:
"""Get an instance of the enhanced base agent."""
return BaseAgent()

View file

@ -1,750 +0,0 @@
"""
Erweiterter Coder-Agent für die Entwicklung und Ausführung von Python-Code.
Integriert direkten Code-Executor zur Vereinfachung des Ablaufs.
"""
import logging
import json
import os
import asyncio
import re
import uuid
import subprocess
import tempfile
import traceback
import sys
import importlib.util
import inspect
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union
from modules.agentservice_base import BaseAgent
from modules.lucydom_interface import get_lucydom_interface
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
from connectors.connector_aichat_openai import ChatService
from modules import agentservice_code_helpers
logger = logging.getLogger(__name__)
class CodeExecutor:
"""
Führt generierten Code in einer isolierten virtuellen Umgebung aus,
während Zugriff auf spezifische App-Module gewährt wird und
automatisch erforderliche Pakete installiert werden.
"""
def __init__(self,
app_modules: List[str] = None,
venv_path: Optional[str] = None,
timeout: int = 30,
max_memory_mb: int = 512,
allowed_packages: List[str] = None,
blocked_packages: List[str] = None):
"""
Initialisiert den CodeExecutor.
Args:
app_modules: Liste von Modulnamen, die dem generierten Code zur Verfügung stehen sollen
venv_path: Pfad zur virtuellen Umgebung. Falls None, wird eine temporäre erstellt
timeout: Maximale Ausführungszeit in Sekunden
max_memory_mb: Maximaler Arbeitsspeicher in MB
allowed_packages: Liste erlaubter Pakete (wenn None, werden alle erlaubt, außer blockierte)
blocked_packages: Liste blockierter Pakete (z.B. gefährliche oder ressourcenintensive)
"""
self.app_modules = app_modules or []
self.venv_path = venv_path
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.allowed_packages = allowed_packages
self.blocked_packages = blocked_packages or ["cryptography", "flask", "django", "tornado", "requests"]
def _create_venv(self) -> str:
"""Erstellt eine virtuelle Umgebung und gibt den Pfad zurück."""
if self.venv_path and os.path.exists(self.venv_path):
return self.venv_path
# Temporäres Verzeichnis für die virtuelle Umgebung erstellen
self.temp_dir = tempfile.mkdtemp(prefix="ai_code_exec_")
venv_path = os.path.join(self.temp_dir, "venv")
try:
# Virtuelle Umgebung erstellen
logger.info(f"Erstelle virtuelle Umgebung in {venv_path}")
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
return venv_path
except subprocess.CalledProcessError as e:
logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}")
raise RuntimeError(f"Konnte venv nicht erstellen: {e}")
def _get_pip_executable(self, venv_path: str) -> str:
"""Ermittelt den Pfad zum pip-Executable in der virtuellen Umgebung."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "pip.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "pip")
def _get_python_executable(self, venv_path: str) -> str:
"""Ermittelt den Pfad zum Python-Executable in der virtuellen Umgebung."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
def _install_packages(self, packages: List[str], venv_path: str) -> Tuple[bool, str]:
"""
Installiert Pakete in der virtuellen Umgebung.
Args:
packages: Liste der zu installierenden Pakete
venv_path: Pfad zur virtuellen Umgebung
Returns:
Tuple aus (Erfolg, Fehlermeldung)
"""
if not packages:
return True, ""
# Überprüfen, ob Pakete erlaubt sind
blocked = []
for package in packages:
# Paketname ohne Version extrahieren
pkg_name = re.split('[=<>]', package)[0].strip()
if self.blocked_packages and pkg_name.lower() in [p.lower() for p in self.blocked_packages]:
blocked.append(pkg_name)
if self.allowed_packages and pkg_name.lower() not in [p.lower() for p in self.allowed_packages]:
blocked.append(pkg_name)
if blocked:
return False, f"Die folgenden Pakete sind nicht erlaubt: {', '.join(blocked)}"
# Pakete installieren
pip_executable = self._get_pip_executable(venv_path)
logger.info(f"Installiere Pakete in virtueller Umgebung: {', '.join(packages)}")
try:
# pip aktualisieren - mache diesen Schritt optional
try:
subprocess.run(
[pip_executable, "install", "--upgrade", "pip"],
check=False, # Changed from True to False to make it optional
capture_output=True,
timeout=60
)
except Exception as pip_error:
# Log the error but continue
logger.warning(f"Pip-Upgrade fehlgeschlagen, fahre mit Paketinstallation fort: {pip_error}")
# Pakete installieren
process = subprocess.run(
[pip_executable, "install"] + packages,
check=True,
capture_output=True,
text=True,
timeout=120 # 2 Minuten Timeout für Paketinstallation
)
return True, process.stdout
except subprocess.CalledProcessError as e:
error_msg = f"Fehler bei der Paketinstallation: {e.stderr}"
logger.error(error_msg)
return False, error_msg
except subprocess.TimeoutExpired:
return False, "Zeitüberschreitung bei der Paketinstallation."
except Exception as e:
return False, f"Unerwarteter Fehler bei der Paketinstallation: {str(e)}"
def _extract_required_packages(self, code: str) -> List[str]:
"""
Extrahiert benötigte Pakete aus dem Code durch Analyse von Import-Statements
und Pip-Installationsanweisungen.
Args:
code: Der Python-Code
Returns:
Liste der erkannten Paketnamen
"""
packages = set()
# Paketkommentare erkennen (# pip install package)
pip_comments = re.findall(r'#\s*pip\s+install\s+([^#\n]+)', code)
for comment in pip_comments:
for pkg in comment.split():
if pkg and not pkg.startswith('-'):
packages.add(pkg.strip())
# Import-Statements analysieren
import_lines = re.findall(r'^(?:import|from)\s+([^\s.]+)(?:\s+import|\s*$|\.)', code, re.MULTILINE)
# Standardmodule, die nicht installiert werden müssen
std_modules = {
'os', 'sys', 'time', 'datetime', 'math', 're', 'random', 'json',
'collections', 'itertools', 'functools', 'pathlib', 'shutil',
'tempfile', 'uuid', 'subprocess', 'threading', 'logging',
'traceback', 'io', 'copy'
}
# Module der App, die nicht installiert werden müssen
app_modules_prefixes = set(m.split('.')[0] for m in self.app_modules)
for module in import_lines:
if module not in std_modules and module not in app_modules_prefixes:
packages.add(module)
return list(packages)
def _create_module_loader(self) -> str:
"""
Erstellt ein Hilfsskript, das App-Module in die venv importiert.
Gibt den Pfad zum Hilfsskript zurück.
"""
if not self.app_modules:
return ""
# Temporäre Datei für den Module-Loader erstellen
module_loader_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
"module_loader.py")
# Pfad zu den App-Modulen bestimmen
app_path = os.path.abspath(os.path.dirname(os.path.dirname(__file__)))
# Modul-Loader-Code generieren
loader_code = f"""
import sys
import importlib.util
import os
# App-Pfad zum Suchpfad hinzufügen
sys.path.insert(0, "{app_path}")
# Module importieren
modules = {{}}
"""
# Code zum Importieren der Module hinzufügen
for module_name in self.app_modules:
loader_code += f"""
try:
modules["{module_name}"] = __import__("{module_name}", fromlist=["*"])
print(f"Modul '{module_name}' erfolgreich importiert")
except ImportError as e:
print(f"Fehler beim Importieren von '{module_name}': {{e}}")
"""
# Loader-Datei schreiben
with open(module_loader_path, "w") as f:
f.write(loader_code)
return module_loader_path
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Führt den generierten Code in einer isolierten Umgebung aus.
Args:
code: Der auszuführende Python-Code
input_data: Eingabedaten für den Code (werden als JSON serialisiert)
Returns:
Dict mit Ausführungsergebnissen, Ausgabe und Fehlern
"""
# Virtuelle Umgebung erstellen oder bestehende verwenden
venv_path = self._create_venv()
# Erforderliche Pakete aus dem Code extrahieren
required_packages = self._extract_required_packages(code)
# Pakete installieren, falls erforderlich
install_success = True
install_log = ""
if required_packages:
install_success, install_log = self._install_packages(required_packages, venv_path)
if not install_success:
return {
"success": False,
"output": "",
"error": f"Fehler bei der Installation der erforderlichen Pakete: {install_log}",
"result": None,
"installed_packages": required_packages
}
# Temporäre Datei für den Code erstellen
code_id = str(uuid.uuid4())[:8]
code_file_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"ai_code_{code_id}.py")
# Module-Loader erstellen
module_loader_path = self._create_module_loader()
# Eingabedaten als JSON speichern, wenn vorhanden
input_path = ""
if input_data:
import json
input_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"input_{code_id}.json")
with open(input_path, "w") as f:
json.dump(input_data, f)
# Outputpfad für Ergebnisse
output_path = os.path.join(self.temp_dir or tempfile.mkdtemp(prefix="ai_code_exec_"),
f"output_{code_id}.json")
# Prepare all paths using forward slashes for consistency across platforms
safe_module_loader_path = module_loader_path.replace('\\', '/') if module_loader_path else ""
safe_input_path = input_path.replace('\\', '/') if input_path else ""
safe_output_path = output_path.replace('\\', '/')
wrapped_code = f"""
# -*- coding: utf-8 -*-
# coding: utf-8
import sys
import json
import traceback
import os
# Ergebnisstruktur
result = {{
"success": False,
"output": "",
"error": "",
"result": None,
"installed_packages": {required_packages}
}}
try:
# Module laden, falls erforderlich
if "{safe_module_loader_path}":
module_loader = __import__("module_loader")
globals().update({{k: v for k, v in module_loader.modules.items()}})
# Eingabedaten laden, falls vorhanden
input_data = None
if "{safe_input_path}":
with open("{safe_input_path}", "r") as f:
input_data = json.load(f)
# Ausgabeumleitung
from io import StringIO
original_stdout = sys.stdout
original_stderr = sys.stderr
captured_stdout = StringIO()
captured_stderr = StringIO()
sys.stdout = captured_stdout
sys.stderr = captured_stderr
# Benutzercode ausführen
try:
# Den Code in einem lokalen Namespace ausführen
local_vars = {{"input_data": input_data}}
exec('''{code}''', globals(), local_vars)
# Ergebnis speichern, falls eine Variable 'result' definiert wurde
if "result" in local_vars:
result["result"] = local_vars["result"]
result["success"] = True
except Exception as e:
result["error"] = str(e)
result["error"] += "\\n" + traceback.format_exc()
finally:
# Ausgabe erfassen
result["output"] = captured_stdout.getvalue()
result["error"] += captured_stderr.getvalue()
# Ausgabeumleitung zurücksetzen
sys.stdout = original_stdout
sys.stderr = original_stderr
except Exception as outer_e:
result["error"] = f"Fehler beim Ausführen des Setups: {{outer_e}}\\n{{traceback.format_exc()}}"
# Ergebnis speichern
with open("{safe_output_path}", "w") as f:
json.dump(result, f, default=str)
"""
# Code in temporäre Datei schreiben with UTF-8 encoding
with open(code_file_path, "w", encoding="utf-8") as f:
f.write(wrapped_code)
# Python-Interpreter aus der virtuellen Umgebung bestimmen
python_executable = self._get_python_executable(venv_path)
# Code ausführen
logger.info(f"Führe Code in virtueller Umgebung aus: {python_executable}")
try:
# Prozess mit Ressourcenbeschränkungen ausführen
cmd = [python_executable, code_file_path]
# Umgebungsvariablen setzen, um Speicherlimit zu erzwingen
env = os.environ.copy()
if self.max_memory_mb:
if os.name == 'posix': # Unix/Linux
# Auf Unix-Systemen können wir ulimit verwenden
cmd = ["bash", "-c", f"ulimit -v {self.max_memory_mb * 1024} && {python_executable} {code_file_path}"]
elif os.name == 'nt': # Windows
# Auf Windows können wir keine harten Speichergrenzen setzen, aber Job Objects verwenden
# Hier müsste eine komplexere Lösung implementiert werden
pass
# Prozess starten und mit Timeout ausführen
process = subprocess.run(
cmd,
timeout=self.timeout,
env=env,
capture_output=True,
text=True
)
# Ergebnis aus der Ausgabedatei lesen
if os.path.exists(output_path):
with open(output_path, "r") as f:
import json
execution_result = json.load(f)
else:
execution_result = {
"success": False,
"output": process.stdout,
"error": f"Keine Ergebnisdatei gefunden. Stderr: {process.stderr}",
"result": None,
"installed_packages": required_packages
}
except subprocess.TimeoutExpired:
execution_result = {
"success": False,
"output": "",
"error": f"Zeitüberschreitung bei der Ausführung (Timeout nach {self.timeout} Sekunden)",
"result": None,
"installed_packages": required_packages
}
except Exception as e:
execution_result = {
"success": False,
"output": "",
"error": f"Fehler bei der Ausführung: {str(e)}",
"result": None,
"installed_packages": required_packages
}
# Informationen zur Paketinstallation hinzufügen
if install_log:
execution_result["package_install_log"] = install_log
# Temporäre Dateien aufräumen
self._cleanup_temp_files([code_file_path, input_path, output_path])
return execution_result
def _cleanup_temp_files(self, file_paths: List[str]):
"""Räumt temporäre Dateien auf."""
for path in file_paths:
if path and os.path.exists(path):
try:
os.remove(path)
except Exception as e:
logger.warning(f"Konnte temporäre Datei nicht löschen {path}: {e}")
def cleanup(self):
"""Räumt alle temporären Ressourcen auf."""
if self.temp_dir and os.path.exists(self.temp_dir):
import shutil
try:
shutil.rmtree(self.temp_dir)
logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
except Exception as e:
logger.warning(f"Konnte temporäres Verzeichnis nicht löschen {self.temp_dir}: {e}")
def __del__(self):
"""Aufräumen beim Garbage Collection."""
self.cleanup()
class CoderAgent(BaseAgent):
"""Erweiterter Agent für die Entwicklung und Ausführung von Python-Code"""
def __init__(self):
"""Initialize the coder agent with proper type and capabilities"""
super().__init__()
# Agent metadata
self.id = "coder"
self.type = "coder"
self.name = "Python Code Agent"
self.description = "Entwickelt und führt Python-Code aus"
self.capabilities = "code_development,data_processing,file_processing,automation"
self.result_format = "python_code"
# Init utilities
self.file_utils = FileUtils()
self.message_utils = MessageUtils()
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation
self.ai_max_tokens = 2000 # Enough tokens for complex code
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format,
"metadata": {
"timeout": self.executor_timeout,
"memory_limit": self.executor_memory_limit
}
}
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
"""
Processes a message to develop and execute Python code.
Args:
message: The message to process
workflow: The current workflow
context: Additional context information
log_func: Function for workflow logging
Returns:
Response message
"""
# Initialize logging
workflow_id = workflow.get("id")
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"CoderAgent startet Verarbeitung", "agents")
# Initialize utilities
workflow_utils = WorkflowUtils(workflow_id)
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
try:
# Check if user directly provided code
content = message.get("content", "")
documents = message.get("documents", [])
# Extract code from message content
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
code_to_execute = None
if code_blocks:
# Use the first code block found
code_to_execute = code_blocks[0]
logging_utils.info(f"Code aus Nachricht extrahiert ({len(code_to_execute)} Zeichen)", "agents")
else:
# Generate code based on the message content using OpenAI
logging_utils.info("Kein Code in der Nachricht gefunden, generiere neuen Code mit AI", "agents")
# Generate code using AI
code_to_execute = await self._generate_code_from_prompt(content, documents, context)
if not code_to_execute:
logging_utils.warning("AI konnte keinen Code generieren", "agents")
response["content"] = "Ich konnte basierend auf Ihrer Anfrage keinen ausführbaren Code generieren. Bitte geben Sie detailliertere Anweisungen an."
self.message_utils.finalize_message(response)
return response
logging_utils.info(f"Code mit AI generiert ({len(code_to_execute)} Zeichen)", "agents")
# Get database interface for code execution
mandate_id = workflow.get("mandate_id", 0)
user_id = workflow.get("user_id", 0)
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
# Execute the code
if code_to_execute:
logging_utils.info("Führe Code aus", "execution")
# Prepare execution context
execution_context = {
"workflow_id": workflow_id,
"documents": documents,
"message": message,
"mandate_id": mandate_id,
"user_id": user_id
}
# Execute code
result = await self._execute_code(code_to_execute, lucydom_interface, execution_context)
# Prepare response
if result.get("success", False):
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logging_utils.info("Code erfolgreich ausgeführt", "execution")
# Format response content
response_content = f"## Code erfolgreich ausgeführt\n\n"
# Include the executed code
response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n"
# Include the output if available
if output:
response_content += f"### Ausgabe\n\n```\n{output}\n```\n\n"
# Include the execution result if available
if execution_result:
result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
response_content += f"### Ergebnis\n\n```\n{result_str}\n```\n\n"
response["content"] = response_content
# Process any files created by the code
if isinstance(execution_result, dict) and "created_files" in execution_result:
created_files = execution_result.get("created_files", [])
for file_info in created_files:
file_id = file_info.get("id")
if file_id:
logging_utils.info(f"Füge erstellte Datei {file_info.get('name', file_id)} zu Dokumenten hinzu", "files")
file_meta = lucydom_interface.get_file(file_id)
if file_meta:
# Add file document to the response
doc = {
"id": f"doc_{uuid.uuid4()}",
"source": file_meta,
"type": "file"
}
response["documents"].append(doc)
else:
# Code execution failed
error = result.get("error", "Unbekannter Fehler")
logging_utils.error(f"Fehler bei der Codeausführung: {error}", "execution")
# Format error response
response_content = f"## Fehler bei der Codeausführung\n\n"
response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n"
response_content += f"### Fehler\n\n```\n{error}\n```\n\n"
# Add recommendation based on error
response_content += self._get_error_recommendation(error)
response["content"] = response_content
else:
# No code to execute
response["content"] = "Ich konnte keinen ausführbaren Code finden oder generieren. Bitte geben Sie Python-Code an oder erläutern Sie Ihre Anforderungen genauer."
# Finalize response
self.message_utils.finalize_message(response)
# Log success
logging_utils.info("CoderAgent hat die Anfrage erfolgreich verarbeitet", "agents")
return response
except Exception as e:
error_msg = f"Fehler bei der Verarbeitung durch den CoderAgent: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
response["content"] = f"## Fehler bei der Verarbeitung\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
return response
async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]], context: Dict[str, Any] = None) -> str:
"""
Generate Python code from a prompt using OpenAI service.
Args:
prompt: The prompt to generate code from
documents: Documents associated with the prompt
context: Additional context information
Returns:
Generated Python code
"""
try:
# Initialize AI service
chat_service = ChatService()
# Prepare a detailed prompt for code generation
ai_prompt = self._prepare_code_prompt(prompt, documents)
# Create messages for the OpenAI API
messages = [
{"role": "system", "content": "You are a Python code generator. Generate only executable Python code without explanations. The code should be well-commented, handle errors appropriately, and follow best practices."},
{"role": "user", "content": ai_prompt}
]
# Call the OpenAI API
logging.info(f"Calling OpenAI API to generate code")
generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
# Extract code from the response (the AI might wrap it in markdown)
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content)
if code_blocks:
# Use the first code block found
return code_blocks[0].strip()
else:
# If no code block is found, return the raw response
return generated_content.strip()
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
# Return a basic error-handling code
estr=str(e).replace('"', '\\"')
return f"""
# Error during code generation
print(f"An error occurred during code generation: {estr}")
# Return an error result
result = {{"error": "Code generation failed", "message": "{estr}"}}
"""
def _prepare_code_prompt(self, user_prompt: str, documents: List[Dict[str, Any]]) -> str:
"""
Prepares a detailed prompt for the AI to generate Python code.
Args:
user_prompt: The original user request
documents: Available documents
Returns:
A detailed prompt for code generation
"""
# Start with the user's request
prompt = f"""Generate Python code to solve the following task:
{user_prompt}
"""
# Add information about available documents
if documents:
prompt += "\nAvailable documents:\n"
for i, doc in enumerate(documents):
source = doc.get("source", {})
doc_name = source.get("name", f"Document {i+1}")
doc_type = source.get("content_type", "unknown")
doc_id = source.get("id", "")
prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n"
# Add information about how to access documents
prompt += """
To access these documents, use:
- await load_file(file_id, encoding='utf-8') for text files
- await load_file(file_id) for binary files
"""

View file

@ -17,59 +17,298 @@ async def data_extraction(
ai_service,
lucydom_interface = None,
workflow_id: str = None,
add_log_func = None
add_log_func = None,
document_handler = None # Add this parameter
) -> Dict[str, Any]:
"""
Führt einen AI Call durch, um zu bestimmen, welche Inhalte aus welchen Dateiobjekten extrahiert werden sollen,
und führt dann die notwendigen Extraktionen durch.
Performs AI-driven data extraction with support for the document handler.
Args:
prompt: Spezifizierung, welche Daten extrahiert werden sollen
files: Liste aller verfügbaren Dateien mit Metadaten
messages: Liste aller Nachrichten im Workflow
ai_service: Service für KI-Anfragen
lucydom_interface: Interface für Datenbankzugriffe (optional)
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
prompt: Specification of what data to extract
files: List of all available files with metadata
messages: List of all messages in the workflow
ai_service: Service for AI requests
lucydom_interface: Interface for database access (optional)
workflow_id: Optional workflow ID for logging
add_log_func: Optional function for adding logs
document_handler: Optional document handler for structured document operations
Returns:
Strukturiertes Text-Objekt mit extrahierten Daten und Kontext-Informationen
Structured text object with extracted data and context information
"""
try:
# 1. AI Call zur Bestimmung der notwendigen Extraktionen
# Create extraction plan using AI
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
# 2. Extraktionen durchführen
extracted_data = await _execute_extractions(
extraction_plan,
files,
messages,
lucydom_interface,
ai_service,
workflow_id,
add_log_func
)
# 3. Extrahierte Daten strukturieren
# Execute extractions, preferring document handler if available
if document_handler:
extracted_data = await _execute_extractions_with_handler(
extraction_plan,
files,
messages,
document_handler,
ai_service,
workflow_id,
add_log_func
)
else:
# Fall back to original implementation
extracted_data = await _execute_extractions(
extraction_plan,
files,
messages,
lucydom_interface,
ai_service,
workflow_id,
add_log_func
)
# Structure extracted data
structured_result = _structure_extracted_data(extracted_data, files, prompt)
return structured_result
except Exception as e:
logger.error(f"Fehler bei der Datenextraktion: {str(e)}", exc_info=True)
logger.error(f"Error in data extraction: {str(e)}", exc_info=True)
# Fehler-Log hinzufügen
# Add error log
if add_log_func and workflow_id:
add_log_func(workflow_id, f"Fehler bei der Datenextraktion: {str(e)}", "error")
add_log_func(workflow_id, f"Data extraction error: {str(e)}", "error")
# Fehler-Ergebnis zurückgeben
# Return error result
return {
"error": str(e),
"status": "error",
"files_processed": len(files),
"message": f"Die Datenextraktion konnte nicht durchgeführt werden: {str(e)}"
"message": f"Data extraction failed: {str(e)}"
}
async def _execute_extractions_with_handler(
extraction_plan: List[Dict[str, Any]],
files: List[Dict[str, Any]],
messages: List[Dict[str, Any]],
document_handler,
ai_service,
workflow_id: str = None,
add_log_func = None
) -> List[Dict[str, Any]]:
"""
Execute extractions using the document handler.
Args:
extraction_plan: List of extraction instructions
files: List of all available files
messages: List of all messages
document_handler: Document handler for structured operations
ai_service: Service for AI requests
workflow_id: Optional workflow ID for logging
add_log_func: Optional function for adding logs
Returns:
List with extracted data per file
"""
extracted_data = []
# Sort by importance (highest first)
sorted_plan = sorted(extraction_plan, key=lambda x: x.get("importance", 0), reverse=True)
for extraction_item in sorted_plan:
file_id = extraction_item.get("file_id")
extract_needed = extraction_item.get("extract_needed", False)
extraction_prompt = extraction_item.get("extraction_prompt", "")
# Find file metadata
file_metadata = next((f for f in files if f.get("id") == file_id), None)
if not file_metadata:
logger.warning(f"File with ID {file_id} not found")
continue
file_name = file_metadata.get("name", "")
file_type = file_metadata.get("type", "")
content_type = file_metadata.get("content_type", "")
# Log
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Processing file: {file_name} (Extraction needed: {extract_needed})",
"info"
)
# Only perform extraction if needed
if extract_needed:
# Find document in existing messages if available
existing_content = _find_document_in_messages(file_id, messages)
# Check if we should use document handler for contextual extraction
if existing_content:
# If document exists but needs contextual extraction
document_id = existing_content.get("document_id")
message_id = existing_content.get("message_id")
if document_id and message_id:
# Find the message containing the document
for message in messages:
if message.get("id") == message_id:
# Extract content with context
try:
# Find document reference
doc_reference = None
for doc in message.get("documents", []):
if doc.get("id") == document_id:
doc_reference = doc
break
if doc_reference:
# Use document handler to perform contextual extraction
extracted_text = await document_handler.extract_document_content(
document_id,
file_id,
extraction_prompt
)
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": extracted_text,
"is_extracted": True,
"extraction_method": "contextual_extraction"
})
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Contextual extraction for {file_name}: {extraction_prompt}",
"info"
)
continue
except Exception as e:
logger.error(f"Error in contextual extraction for {file_name}: {str(e)}")
# If we reach here, we need to perform a new extraction
try:
file_content = await document_handler.add_file_to_message(
{}, # Empty message to extract just the document
file_id,
extraction_prompt
)
# Get the extracted content from the document
if "documents" in file_content and file_content["documents"]:
doc = file_content["documents"][0]
content_text = ""
is_extracted = False
for content in doc.get("contents", []):
if content.get("type") == "text":
content_text = content.get("text", "")
is_extracted = content.get("is_extracted", False)
break
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": content_text,
"is_extracted": is_extracted,
"extraction_method": "document_handler"
})
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Extracted {file_name} using document handler",
"info"
)
else:
# Extraction failed
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": f"Failed to extract content from {file_name}",
"is_extracted": False,
"extraction_method": "failed"
})
except Exception as e:
logger.error(f"Error extracting {file_name}: {str(e)}")
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": f"Error extracting: {str(e)}",
"is_extracted": False,
"extraction_method": "error"
})
else:
# No extraction needed, use existing content
existing_content = _find_document_in_messages(file_id, messages)
if existing_content:
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": existing_content.get("content", ""),
"is_extracted": existing_content.get("is_extracted", False),
"extraction_method": "existing_content"
})
else:
# No existing content found
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": f"No content available for {file_name}",
"is_extracted": False,
"extraction_method": "none"
})
return extracted_data
def _find_document_in_messages(file_id: int, messages: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Find a document by file ID in workflow messages.
Args:
file_id: ID of the file to find
messages: List of messages to search
Returns:
Dictionary with document information or empty dict if not found
"""
for message in messages:
for doc_index, document in enumerate(message.get("documents", [])):
source = document.get("source", {})
# Check if file ID matches
if source.get("id") == str(file_id) or source.get("id") == file_id:
# Found the document
content_text = ""
is_extracted = False
# Look for text content
for content in document.get("contents", []):
if content.get("type") == "text":
content_text = content.get("text", "")
is_extracted = content.get("is_extracted", False)
break
return {
"document_id": document.get("id"),
"message_id": message.get("id"),
"content": content_text,
"is_extracted": is_extracted
}
return {}
async def _create_extraction_plan(
prompt: str,
files: List[Dict[str, Any]],
@ -454,6 +693,7 @@ def _structure_extracted_data(
def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Extract document contents for a specific file from workflow messages.
Enhanced to handle the new document structure.
Args:
file_id: ID of the file
@ -469,13 +709,21 @@ def _extract_document_contents_from_messages(file_id: int, messages: List[Dict[s
for document in message.get("documents", []):
source = document.get("source", {})
# Check if file ID matches
if source.get("id") == file_id or (source.get("type") == "file" and source.get("id") == file_id):
# Check if file ID matches (handle both string and int comparison)
if (source.get("id") == file_id or
(isinstance(source.get("id"), str) and source.get("id") == str(file_id)) or
(isinstance(file_id, str) and source.get("id") == int(file_id))):
# Add contents of the file
doc_contents = document.get("contents", [])
if doc_contents:
contents.extend(doc_contents)
# Ensure each content has document reference
for content in doc_contents:
content_copy = content.copy()
content_copy["document_id"] = document.get("id")
content_copy["message_id"] = message.get("id")
contents.append(content_copy)
return contents

View file

@ -0,0 +1,498 @@
"""
Enhanced document handling module for the Agentservice (continued).
"""
import os
import logging
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union
logger = logging.getLogger(__name__)
class DocumentHandler:
"""
Centralized document handler for consistent document management across the system.
"""
def __init__(self, workflow_id: str = None, lucydom_interface = None, ai_service = None):
"""Initialize the document handler."""
self.workflow_id = workflow_id
self.lucydom_interface = lucydom_interface
self.ai_service = ai_service
# Import necessary utilities
from modules.agentservice_filemanager import get_file_manager
self.file_manager = get_file_manager()
def set_workflow_id(self, workflow_id: str):
"""Set or update the workflow ID."""
self.workflow_id = workflow_id
def set_lucydom_interface(self, lucydom_interface):
"""Set or update the LucyDOM interface."""
self.lucydom_interface = lucydom_interface
def set_ai_service(self, ai_service):
"""Set or update the AI service."""
self.ai_service = ai_service
async def add_file_to_message(self, message: Dict[str, Any], file_id: int, extraction_prompt: str = None) -> Dict[str, Any]:
"""
Add a file to a message with optional contextual extraction.
Args:
message: The message to add the file to
file_id: ID of the file to add
extraction_prompt: Optional prompt for contextual extraction (e.g., for images)
Returns:
Updated message with the file added
"""
if not self.lucydom_interface:
logger.error("LucyDOM interface not available")
return message
try:
# Get file metadata
file = self.lucydom_interface.get_file(file_id)
if not file:
logger.warning(f"File with ID {file_id} not found")
return message
# Get necessary file information
file_name = file.get("name", "unnamed_file")
file_type = file.get("type", "unknown")
content_type = file.get("content_type")
# Initialize documents array if needed
if "documents" not in message:
message["documents"] = []
# Check if file is already in the message
file_already_added = any(
doc.get("source", {}).get("id") == str(file_id)
for doc in message.get("documents", [])
)
if file_already_added:
logger.info(f"File {file_name} already exists in message, skipping")
return message
# Create a unique document ID
doc_id = f"doc_{uuid.uuid4()}"
# Create document structure
document = {
"id": doc_id,
"source": {
"type": "file",
"id": str(file_id),
"name": file_name,
"content_type": content_type,
"size": file.get("size"),
"upload_date": file.get("upload_date", datetime.now().isoformat())
},
"contents": []
}
# Only read content if we have extraction prompt or specific types
if (extraction_prompt or
file_type in ["document", "text"] or
(content_type and content_type.startswith("text/"))):
# Read file content
file_content = await self.lucydom_interface.read_file_content(file_id)
if file_content:
# Process based on file type
if file_type == "image" or (content_type and content_type.startswith("image/")):
# Image analysis if prompt provided
if extraction_prompt and self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
image_analysis = await self.ai_service.analyze_image(
image_data=file_content,
prompt=extraction_prompt or "Describe this image in detail",
mime_type=content_type
)
# Add the analysis as text content
document["contents"].append({
"type": "text",
"text": f"Image Analysis:\n{image_analysis}",
"is_extracted": True,
"extraction_context": extraction_prompt
})
logger.info(f"Added image analysis for {file_name} to message")
except Exception as e:
logger.error(f"Error analyzing image {file_name}: {str(e)}")
document["contents"].append({
"type": "text",
"text": f"Image file: {file_name} (Analysis failed: {str(e)})",
"is_extracted": False
})
else:
# Just add placeholder if no analysis available
document["contents"].append({
"type": "text",
"text": f"Image file: {file_name} (no analysis requested)",
"is_extracted": False
})
else:
# For other file types, extract text
from modules.agentservice_utils import extract_text_from_file_content
content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
document["contents"].append({
"type": "text",
"text": content,
"is_extracted": is_extracted,
"extraction_context": extraction_prompt
})
logger.info(f"Added text content for {file_name} to message (extracted: {is_extracted})")
else:
# No content available
document["contents"].append({
"type": "text",
"text": f"File content not available for {file_name}",
"is_extracted": False
})
else:
# Just add reference without content
document["contents"].append({
"type": "text",
"text": f"File: {file_name} (content not loaded)",
"is_extracted": False
})
# Add document to message
message["documents"].append(document)
logger.info(f"File {file_name} successfully added to message")
return message
except Exception as e:
logger.error(f"Error adding file {file_id} to message: {str(e)}")
return message
async def add_files_to_message(self, message: Dict[str, Any], file_ids: List[int], extraction_prompt: str = None) -> Dict[str, Any]:
"""
Add multiple files to a message.
Args:
message: The message to add files to
file_ids: List of file IDs to add
extraction_prompt: Optional prompt for contextual extraction
Returns:
Updated message with files added
"""
updated_message = message.copy()
for file_id in file_ids:
updated_message = await self.add_file_to_message(updated_message, file_id, extraction_prompt)
return updated_message
async def extract_document_content(self, doc_id: str, message: Dict[str, Any], extraction_prompt: str) -> Dict[str, Any]:
"""
Extract or update document content with contextual extraction.
Args:
doc_id: ID of the document to extract
message: Message containing the document
extraction_prompt: Contextual prompt for extraction
Returns:
Updated message with extracted content
"""
if not message or "documents" not in message:
return message
updated_message = message.copy()
# Find the document
for i, document in enumerate(updated_message.get("documents", [])):
if document.get("id") == doc_id:
# Get file ID from source
source = document.get("source", {})
file_id = source.get("id")
if file_id and self.lucydom_interface:
# Get file metadata
file = self.lucydom_interface.get_file(int(file_id))
if not file:
continue
# Get file content
file_content = await self.lucydom_interface.read_file_content(int(file_id))
if not file_content:
continue
# Process based on file type
file_name = file.get("name", "unnamed_file")
file_type = file.get("type", "unknown")
content_type = file.get("content_type")
# Update content based on file type
if file_type == "image" or (content_type and content_type.startswith("image/")):
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
image_analysis = await self.ai_service.analyze_image(
image_data=file_content,
prompt=extraction_prompt,
mime_type=content_type
)
# Create or update content
new_content = {
"type": "text",
"text": f"Image Analysis:\n{image_analysis}",
"is_extracted": True,
"extraction_context": extraction_prompt
}
# Update or add content
contents = document.get("contents", [])
contents_updated = False
for j, content in enumerate(contents):
if content.get("type") == "text":
updated_message["documents"][i]["contents"][j] = new_content
contents_updated = True
break
if not contents_updated:
if not updated_message["documents"][i].get("contents"):
updated_message["documents"][i]["contents"] = []
updated_message["documents"][i]["contents"].append(new_content)
logger.info(f"Updated image analysis for {file_name} with new context: {extraction_prompt}")
except Exception as e:
logger.error(f"Error updating image analysis for {file_name}: {str(e)}")
else:
# For other file types, extract text with new context
from modules.agentservice_utils import extract_text_from_file_content
content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
new_content = {
"type": "text",
"text": content,
"is_extracted": is_extracted,
"extraction_context": extraction_prompt
}
# Update or add content
contents = document.get("contents", [])
contents_updated = False
for j, content_item in enumerate(contents):
if content_item.get("type") == "text":
updated_message["documents"][i]["contents"][j] = new_content
contents_updated = True
break
if not contents_updated:
if not updated_message["documents"][i].get("contents"):
updated_message["documents"][i]["contents"] = []
updated_message["documents"][i]["contents"].append(new_content)
logger.info(f"Updated text extraction for {file_name} with new context: {extraction_prompt}")
# Found and processed the document, stop searching
break
return updated_message
async def extract_files_from_workflow(self, workflow: Dict[str, Any], extraction_prompt: str, file_filter: str = None) -> Dict[str, Any]:
"""
Extract all relevant files from a workflow with context-aware extraction.
Args:
workflow: The workflow object
extraction_prompt: Contextual prompt for extraction
file_filter: Optional filter for file types (e.g., "csv", "image")
Returns:
Dictionary with extracted content
"""
# Import for data extraction
from modules.agentservice_dataextraction import data_extraction
# Get all files from the workflow
files = []
# Process all messages
for message in workflow.get("messages", []):
# Extract documents from the message
for doc in message.get("documents", []):
source = doc.get("source", {})
# Only include file documents
if source.get("type") == "file":
file_info = {
"id": source.get("id", ""),
"name": source.get("name", ""),
"type": source.get("type", ""),
"content_type": source.get("content_type", ""),
"size": source.get("size", 0)
}
# Apply filter if provided
if file_filter:
file_name = file_info.get("name", "").lower()
content_type = file_info.get("content_type", "").lower()
if (file_filter.lower() in file_name or
file_filter.lower() in content_type):
# Check if file is already in the list
if not any(f.get("id") == file_info["id"] for f in files):
files.append(file_info)
else:
# No filter, include all files
if not any(f.get("id") == file_info["id"] for f in files):
files.append(file_info)
# If no files found, return empty result
if not files:
return {
"prompt": extraction_prompt,
"files_processed": 0,
"extracted_content": []
}
# Get all messages from the workflow
workflow_messages = workflow.get("messages", [])
# Extract data using the dataextraction module
extracted_data = await data_extraction(
prompt=extraction_prompt,
files=files,
messages=workflow_messages,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface,
workflow_id=self.workflow_id,
add_log_func=None # We don't have access to add_log_func here
)
return extracted_data
def get_file_content_from_message(self, message: Dict[str, Any], file_id: int = None, doc_id: str = None) -> str:
"""
Get file content from a message.
Args:
message: The message containing the document
file_id: Optional file ID to search for
doc_id: Optional document ID to search for
Returns:
Text content of the file if available
"""
if not message or "documents" not in message:
return ""
# Search for the document
for document in message.get("documents", []):
# Match by document ID or file ID
source = document.get("source", {})
source_file_id = source.get("id")
if ((doc_id and document.get("id") == doc_id) or
(file_id and source_file_id and str(file_id) == str(source_file_id))):
# Get text content from document
for content in document.get("contents", []):
if content.get("type") == "text":
return content.get("text", "")
return ""
def create_text_document(self, message: Dict[str, Any], content: str, title: str = "Generated Text") -> Dict[str, Any]:
"""
Create a new text document in a message.
Args:
message: The message to add the document to
content: Text content
title: Document title
Returns:
Updated message with the new document
"""
# Initialize documents array if needed
updated_message = message.copy()
if "documents" not in updated_message:
updated_message["documents"] = []
# Create document ID
doc_id = f"doc_{uuid.uuid4()}"
# Create document structure
document = {
"id": doc_id,
"source": {
"type": "generated",
"id": doc_id,
"name": title,
"content_type": "text/plain",
"size": len(content)
},
"contents": [
{
"type": "text",
"text": content,
"is_extracted": True
}
]
}
# Add document to message
updated_message["documents"].append(document)
logger.info(f"Created text document '{title}' in message")
return updated_message
def merge_document_contents(self, message: Dict[str, Any]) -> str:
"""
Merge all document contents from a message into a single text.
Args:
message: The message containing documents
Returns:
Combined text content from all documents
"""
if not message or "documents" not in message:
return ""
combined_text = ""
for document in message.get("documents", []):
source = document.get("source", {})
doc_name = source.get("name", "Unnamed Document")
# Extract text content
doc_text = ""
for content in document.get("contents", []):
if content.get("type") == "text":
doc_text = content.get("text", "")
break
if doc_text:
combined_text += f"\n\n--- {doc_name} ---\n\n{doc_text}"
return combined_text.strip()
# Factory function
def get_document_handler(workflow_id: str = None, lucydom_interface = None, ai_service = None) -> DocumentHandler:
"""Get a document handler instance."""
return DocumentHandler(workflow_id, lucydom_interface, ai_service)

View file

@ -34,6 +34,8 @@ class FileExtractionError(Exception):
"""Exception for file extraction errors."""
pass
class FileManager:
"""Central file management for the Agentservice."""
@ -54,31 +56,30 @@ class FileManager:
# Import utilities
# Instead of storing file_utils, we'll use the imported functions directly
async def read_file_contents(self,
file_contexts: List[Dict[str, Any]],
lucydom_interface,
workflow_id: str = None,
add_log_func = None,
ai_service = None # AI service parameter for image analysis
ai_service = None,
extraction_context: str = None # Add this parameter
) -> Dict[str, Dict[str, Any]]:
"""
Liest den Inhalt aller Dateien und führt bei Bildern und Dokumenten Analysen durch.
Verwendet LucyDOM-Interface statt direkter Dateizugriffe.
Gibt jetzt ein Dictionary mit Dateiinhalten und Extraktionsstatus zurück.
Read file contents with optional contextual extraction.
Args:
file_contexts: Liste der Dateikontexte mit Metadaten
lucydom_interface: LucyDOM-Interface für Dateizugriffe
workflow_id: Optionale ID des Workflows für Logging
add_log_func: Optionale Funktion für das Hinzufügen von Logs
ai_service: Optionaler AI-Service für die Bildanalyse
file_contexts: List of file contexts with metadata
lucydom_interface: LucyDOM interface for file access
workflow_id: Optional workflow ID for logging
add_log_func: Optional function for adding logs
ai_service: AI service for image analysis
extraction_context: Optional context prompt for extraction
Returns:
Dictionary mit Dateiinhalten und Metadaten (file_id -> {content, is_extracted, ...})
Dictionary with file contents and metadata
"""
file_contents = {}
file_contents = {}
# Add debug logging
logger.info(f"Reading contents of {len(file_contexts)} files for workflow {workflow_id}")
@ -88,8 +89,6 @@ class FileManager:
file_type = file.get("type", "unknown")
content_type = file.get("content_type")
print("DEGUB5:",file_name,file_type)
try:
# Dateiinhalt über LucyDOM-Interface abrufen
file_data = await lucydom_interface.read_file_content(file_id)
@ -107,24 +106,26 @@ class FileManager:
logger.info(f"Successfully read file: {file_name} (ID: {file_id}, Type: {file_type})")
# Bildverarbeitung - immer KI-Analyse verwenden, wenn verfügbar
# For image analysis, add extraction context
if file_type == "image" or file_name.lower().endswith(('.jpg', '.jpeg', '.png', '.gif', '.webp')):
if ai_service and hasattr(ai_service, "analyze_image"):
try:
# Use extraction context if provided
prompt = extraction_context or "Describe this image in detail"
image_analysis = await ai_service.analyze_image(
image_data=file_data,
prompt="Describe this image in detail",
prompt=prompt, # Use contextual prompt
mime_type=content_type
)
logger.debug(f"Image analysis successfully generated for {file_name}")
file_contents[file_id] = {
"content": f"Image Analysis:\n{image_analysis}",
"is_extracted": False, # Bildanalyse gilt nicht als Text-Extraktion
"is_extracted": True, # Mark as extracted
"name": file_name,
"type": file_type,
"content_type": content_type
"content_type": content_type,
"extraction_context": prompt # Store the used prompt
}
_log(add_log_func, workflow_id, f"Image {file_name} analyzed successfully", "info")
except Exception as e:
@ -189,51 +190,43 @@ class FileManager:
@staticmethod
def add_file_to_message(message: Dict[str, Any], file_data: Dict[str, Any]) -> Dict[str, Any]:
"""
Fügt eine Datei zu einer Nachricht hinzu mit Kennzeichnung, ob Text extrahiert wurde.
Add a file to a message with consistent document structure.
Args:
message: Die zu erweiternde Nachricht
file_data: Dateimetadaten und Inhalt
message: The message to add the file to
file_data: File metadata and content
Returns:
Die aktualisierte Nachricht mit der Datei
Updated message with the file added
"""
# Detailliertes Logging für Debugging
logger.info(f"Adding file to message: {file_data.get('name', 'unnamed_file')} (ID: {file_data.get('id', 'unknown')})")
# Initialize documents array if needed
if "documents" not in message:
message["documents"] = []
logger.debug("Initialized empty documents array in message")
# Create a unique ID for the document if not provided
doc_id = file_data.get("id", f"file_{uuid.uuid4()}")
# Extract file size if available
# Extract metadata
file_size = file_data.get("size")
if isinstance(file_size, str) and file_size.isdigit():
file_size = int(file_size)
elif file_size is None and file_data.get("content"):
# Estimate size from content if not provided
file_size = len(file_data.get("content", ""))
# Bestimmen, ob der Inhalt bereits extrahiert wurde
# Determine if content is already extracted
content = file_data.get("content", "No content available")
file_name = file_data.get("name", "unnamed_file")
content_type = file_data.get("content_type")
# Prüfen, ob der Inhalt als extrahiert markiert werden sollte
is_extracted = file_data.get("is_extracted", False)
if not is_extracted and isinstance(content, str) and content.strip() and file_name:
# Wenn nicht explizit markiert, aber Inhalt vorhanden ist, prüfen wir den Dateityp
is_extracted = is_text_extractable(file_name, content_type)
# Create standard document structure that matches the data model
# Create standard document structure that follows the data model
document = {
"id": doc_id,
"id": f"doc_{uuid.uuid4()}", # Unique document ID separate from file ID
"source": {
"type": "file",
"id": file_data.get("id", doc_id),
"id": doc_id,
"name": file_name,
"content_type": content_type,
"size": file_size,
@ -243,28 +236,27 @@ class FileManager:
{
"type": "text",
"text": content,
"is_extracted": is_extracted # Flag für den Extraktionsstatus hinzufügen
"is_extracted": is_extracted,
"extraction_context": file_data.get("extraction_context", None)
}
]
}
# Log document structure for debugging
logger.debug(f"Created document structure: id={doc_id}, name={file_name}, is_extracted={is_extracted}")
# Check if file is already in the message to avoid duplicates
# Check if file is already in the message
file_already_added = any(
doc.get("source", {}).get("id") == file_data.get("id")
doc.get("source", {}).get("id") == doc_id
for doc in message.get("documents", [])
)
if not file_already_added:
message["documents"].append(document)
logger.info(f"File {file_name} successfully added to message (total: {len(message.get('documents', []))} files)")
logger.info(f"File {file_name} added to message (total: {len(message.get('documents', []))} files)")
else:
logger.info(f"File {file_name} already exists in message, skipping")
return message
async def analyze_file(self, file_id: int, prompt: str, lucydom_interface, ai_service) -> Dict[str, Any]:
"""
Analyze a file using the appropriate method based on file type.
@ -755,6 +747,85 @@ class FileManager:
return file_contexts
def create_document_reference(self, message: Dict[str, Any], file_id: int, reference_type: str = "reference") -> Dict[str, Any]:
"""
Create a document reference without loading content.
Args:
message: The message to add the reference to
file_id: ID of the file to reference
reference_type: Type of reference (reference, citation, etc.)
Returns:
Updated message with the document reference
"""
if not self.lucydom_interface:
logger.warning("LucyDOM interface not available for document reference")
return message
# Get file metadata
file = self.lucydom_interface.get_file(file_id)
if not file:
logger.warning(f"File with ID {file_id} not found for reference")
return message
# Create document structure with just the reference
document = {
"id": f"ref_{uuid.uuid4()}",
"source": {
"type": "file",
"id": str(file_id),
"name": file.get("name", "referenced_file"),
"content_type": file.get("content_type"),
"size": file.get("size"),
"reference_type": reference_type
},
"contents": [] # Empty contents - will be loaded on demand
}
# Add to message
updated_message = message.copy()
if "documents" not in updated_message:
updated_message["documents"] = []
updated_message["documents"].append(document)
logger.info(f"Added document reference for file {file.get('name')} (ID: {file_id})")
return updated_message
def should_extract_document(self, document: Dict[str, Any], context_prompt: str = None) -> bool:
"""
Determine if a document needs content extraction.
Args:
document: The document object
context_prompt: Current context prompt
Returns:
True if extraction is needed, False otherwise
"""
# If document has no contents, extraction is needed
if not document.get("contents"):
return True
# If document has contents but extraction status is False, extraction may be needed
for content in document.get("contents", []):
if content.get("type") == "text":
# If already extracted, check if context has changed
if content.get("is_extracted", False):
# If context prompt is different from what was used previously,
# we may need to re-extract with the new context
prev_context = content.get("extraction_context")
if context_prompt and prev_context != context_prompt:
return True
return False
return True
# Default to needing extraction
return True
# Factory method
@staticmethod
def get_instance():
@ -763,7 +834,6 @@ class FileManager:
FileManager._instance = FileManager()
return FileManager._instance
# Create a singleton instance for module-level access
file_manager = FileManager.get_instance()
@ -772,6 +842,8 @@ def get_file_manager():
return file_manager
class WorkflowFileManager:
"""
Specialized file manager for workflow operations.
@ -789,6 +861,7 @@ class WorkflowFileManager:
self.workflow_id = workflow_id
self.lucydom_interface = lucydom_interface
self.file_manager = get_file_manager()
self.document_handler = None
def set_workflow_id(self, workflow_id: str):
"""Set or update the workflow ID."""
@ -813,6 +886,15 @@ class WorkflowFileManager:
Returns:
Updated message
"""
# If document handler is available, use it
if self.document_handler:
return await self.document_handler.add_files_to_message(
message,
file_ids,
extraction_prompt=None # Default to no extraction
)
if not self.lucydom_interface:
_log(add_log_func, self.workflow_id, "LucyDOM interface not available", "error")
return message
@ -988,7 +1070,6 @@ class WorkflowFileManager:
return analysis
# Export the workflow file manager factory function
def get_workflow_file_manager(workflow_id: str = None, lucydom_interface = None):
"""Get a workflow file manager instance."""

View file

@ -0,0 +1,338 @@
"""
Agent Communication Protocol module for the Agentservice.
Defines a standardized format for agents to exchange information.
"""
import json
import uuid
from typing import Dict, Any, List, Optional
from datetime import datetime
class AgentMessage:
"""
Standard message format for inter-agent communication.
Includes content, metadata, and document references.
"""
def __init__(
self,
content: str,
sender_id: str,
receiver_id: Optional[str] = None,
message_type: str = "text",
metadata: Optional[Dict[str, Any]] = None,
documents: Optional[List[Dict[str, Any]]] = None,
context_id: Optional[str] = None
):
"""
Initialize an agent message.
Args:
content: The main message content
sender_id: ID of the sending agent
receiver_id: Optional ID of the receiving agent
message_type: Type of message (text, task, result, etc.)
metadata: Optional metadata dictionary
documents: Optional list of document references
context_id: Optional conversation context ID
"""
self.id = f"msg_{uuid.uuid4()}"
self.timestamp = datetime.now().isoformat()
self.content = content
self.sender_id = sender_id
self.receiver_id = receiver_id
self.message_type = message_type
self.metadata = metadata or {}
self.documents = documents or []
self.context_id = context_id
def to_dict(self) -> Dict[str, Any]:
"""Convert the message to a dictionary."""
return {
"id": self.id,
"timestamp": self.timestamp,
"content": self.content,
"sender_id": self.sender_id,
"receiver_id": self.receiver_id,
"message_type": self.message_type,
"metadata": self.metadata,
"documents": self.documents,
"context_id": self.context_id
}
@classmethod
def from_dict(cls, data: Dict[str, Any]) -> 'AgentMessage':
"""Create a message from a dictionary."""
message = cls(
content=data.get("content", ""),
sender_id=data.get("sender_id", "unknown"),
receiver_id=data.get("receiver_id"),
message_type=data.get("message_type", "text"),
metadata=data.get("metadata", {}),
documents=data.get("documents", []),
context_id=data.get("context_id")
)
message.id = data.get("id", message.id)
message.timestamp = data.get("timestamp", message.timestamp)
return message
def to_json(self) -> str:
"""Convert the message to a JSON string."""
return json.dumps(self.to_dict())
@classmethod
def from_json(cls, json_str: str) -> 'AgentMessage':
"""Create a message from a JSON string."""
return cls.from_dict(json.loads(json_str))
class AgentCommunicationProtocol:
"""
Defines the protocol for agents to communicate with each other.
Provides standardized message creation and handling.
"""
@staticmethod
def create_text_message(
content: str,
sender_id: str,
receiver_id: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None,
documents: Optional[List[Dict[str, Any]]] = None,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create a simple text message."""
return AgentMessage(
content=content,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="text",
metadata=metadata,
documents=documents,
context_id=context_id
)
@staticmethod
def create_task_message(
task_description: str,
sender_id: str,
receiver_id: str,
input_data: Optional[Dict[str, Any]] = None,
documents: Optional[List[Dict[str, Any]]] = None,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create a task assignment message."""
metadata = {
"task_type": "general",
"input_data": input_data or {},
"priority": "normal",
"task_id": f"task_{uuid.uuid4()}"
}
return AgentMessage(
content=task_description,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="task",
metadata=metadata,
documents=documents,
context_id=context_id
)
@staticmethod
def create_result_message(
result_content: str,
sender_id: str,
receiver_id: str,
task_id: str,
output_data: Optional[Dict[str, Any]] = None,
result_format: str = "text",
documents: Optional[List[Dict[str, Any]]] = None,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create a task result message."""
metadata = {
"task_id": task_id,
"result_format": result_format,
"status": "completed",
"output_data": output_data or {}
}
return AgentMessage(
content=result_content,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="result",
metadata=metadata,
documents=documents,
context_id=context_id
)
@staticmethod
def create_error_message(
error_description: str,
sender_id: str,
receiver_id: Optional[str] = None,
error_type: str = "general",
error_details: Optional[Dict[str, Any]] = None,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create an error message."""
metadata = {
"error_type": error_type,
"error_details": error_details or {},
"severity": "error"
}
return AgentMessage(
content=error_description,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="error",
metadata=metadata,
context_id=context_id
)
@staticmethod
def create_document_request_message(
document_description: str,
sender_id: str,
receiver_id: str,
filters: Optional[Dict[str, Any]] = None,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create a document request message."""
metadata = {
"request_type": "document",
"filters": filters or {},
"request_id": f"req_{uuid.uuid4()}"
}
return AgentMessage(
content=document_description,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="request",
metadata=metadata,
context_id=context_id
)
@staticmethod
def create_status_update_message(
status_description: str,
sender_id: str,
receiver_id: Optional[str] = None,
status: str = "in_progress",
progress: float = 0.0,
context_id: Optional[str] = None
) -> AgentMessage:
"""Create a status update message."""
metadata = {
"status": status,
"progress": progress,
"update_type": "status"
}
return AgentMessage(
content=status_description,
sender_id=sender_id,
receiver_id=receiver_id,
message_type="status",
metadata=metadata,
context_id=context_id
)
@staticmethod
def convert_system_message_to_agent_message(system_message: Dict[str, Any], sender_id: str) -> AgentMessage:
"""
Convert a system message to an agent message.
Args:
system_message: Message object from the workflow
sender_id: ID of the sending agent
Returns:
AgentMessage instance
"""
# Extract basic information
content = system_message.get("content", "")
message_id = system_message.get("id", f"msg_{uuid.uuid4()}")
timestamp = system_message.get("started_at", datetime.now().isoformat())
# Create metadata
metadata = {
"agent_type": system_message.get("agent_type"),
"agent_name": system_message.get("agent_name"),
"workflow_id": system_message.get("workflow_id"),
"sequence_no": system_message.get("sequence_no"),
"result_format": system_message.get("result_format"),
"original_message_id": message_id
}
# Create agent message
agent_message = AgentMessage(
content=content,
sender_id=sender_id,
message_type="system",
metadata=metadata,
documents=system_message.get("documents", []),
context_id=system_message.get("workflow_id")
)
# Set original ID and timestamp
agent_message.id = message_id
agent_message.timestamp = timestamp
return agent_message
@staticmethod
def convert_agent_message_to_system_message(agent_message: AgentMessage) -> Dict[str, Any]:
"""
Convert an agent message to a system message.
Args:
agent_message: The agent message to convert
Returns:
System message dictionary
"""
message_data = agent_message.to_dict()
metadata = message_data.get("metadata", {})
# Create system message structure
system_message = {
"id": message_data.get("id", f"msg_{uuid.uuid4()}"),
"workflow_id": message_data.get("context_id"),
"started_at": message_data.get("timestamp", datetime.now().isoformat()),
"finished_at": datetime.now().isoformat(),
"sequence_no": metadata.get("sequence_no", 0),
"status": "completed",
"role": "assistant",
"data_stats": {
"processing_time": 0.0,
"token_count": 0,
"bytes_sent": 0,
"bytes_received": 0
},
"agent_type": metadata.get("agent_type"),
"agent_id": message_data.get("sender_id"),
"agent_name": metadata.get("agent_name"),
"result_format": metadata.get("result_format", "text"),
"content": message_data.get("content", ""),
"documents": message_data.get("documents", [])
}
# If this is a result message, add more metadata
if message_data.get("message_type") == "result":
system_message["output_data"] = metadata.get("output_data", {})
system_message["task_id"] = metadata.get("task_id")
return system_message
# Factory function
def get_agent_protocol():
"""Get the agent communication protocol."""
return AgentCommunicationProtocol

View file

@ -30,6 +30,9 @@ class AgentRegistry:
if AgentRegistry._instance is not None:
raise RuntimeError("Singleton instance already exists - use get_instance()")
self.agents = {}
self.ai_service = None
self.document_handler = None
self.lucydom_interface = None
self._load_agents()
def _load_agents(self):
@ -48,10 +51,7 @@ class AgentRegistry:
for module_name in agent_modules:
try:
# Import the module
try:
module = importlib.import_module(f"modules.{module_name}")
except ImportError:
module = importlib.import_module(module_name)
module = importlib.import_module(f"modules.{module_name}")
# Look for the agent class or a get_*_agent function
agent_type = module_name.split('_')[-1]
@ -79,11 +79,33 @@ class AgentRegistry:
logger.warning(f"No agent class or getter function found in module {module_name}")
except ImportError as e:
logger.warning(f"Module {module_name} could not be imported: {e}")
logger.error(f"Module {module_name} could not be imported: {e}")
except Exception as e:
logger.error(f"Error loading agent from module {module_name}: {e}")
def register_agent(self, agent: BaseAgent):
def set_dependencies(self, ai_service=None, document_handler=None, lucydom_interface=None):
"""
Set system dependencies for all agents.
Args:
ai_service: AI service for text generation
document_handler: Document handler for document operations
lucydom_interface: LucyDOM interface for database access
"""
self.ai_service = ai_service
self.document_handler = document_handler
self.lucydom_interface = lucydom_interface
# Update dependencies for all registered agents
for agent_id, agent in self.agents.items():
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(
ai_service=ai_service,
document_handler=document_handler,
lucydom_interface=lucydom_interface
)
def register_agent(self, agent: 'BaseAgent'):
"""
Register an agent in the registry.
@ -91,10 +113,22 @@ class AgentRegistry:
agent: The agent to register
"""
agent_type = agent.type
agent_id = getattr(agent, 'id', agent_type)
# Initialize enhanced agents with dependencies
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(
ai_service=self.ai_service,
document_handler=self.document_handler,
lucydom_interface=self.lucydom_interface
)
self.agents[agent_type] = agent
# Also register by ID
self.agents[agent.id] = agent
logger.debug(f"Agent '{agent.name}' (Type: {agent_type}) registered")
# Also register by ID if it's different from type
if agent_id != agent_type:
self.agents[agent_id] = agent
logger.debug(f"Agent '{agent.name}' (Type: {agent_type}, ID: {agent_id}) registered")
def get_agent(self, agent_identifier: str) -> Optional[BaseAgent]:
"""
@ -199,22 +233,56 @@ class AgentRegistry:
for agent in self.agents.values():
if agent not in seen_agents:
# Get agent info
agent_info = agent.get_agent_info()
agent_id = agent_info["id"]
agent_id = getattr(agent, 'id', agent.type)
# Extract capabilities
capabilities = agent_info.get("capabilities", "")
# Extract capabilities - check for get_capabilities method first
if hasattr(agent, 'get_capabilities') and callable(getattr(agent, 'get_capabilities')):
capabilities = agent.get_capabilities()
else:
# Fall back to string parsing
capabilities_str = getattr(agent, 'capabilities', "")
capabilities = [kw.strip().lower() for kw in capabilities_str.split(',') if kw.strip()]
# Split capabilities into keywords
if capabilities:
keywords = [kw.strip().lower() for kw in capabilities.split(',')]
# Add each keyword to the mapping
for keyword in keywords:
if keyword not in capabilities_map:
capabilities_map[keyword] = []
capabilities_map[keyword].append(agent_id)
# Add each capability to the mapping
for capability in capabilities:
if capability not in capabilities_map:
capabilities_map[capability] = []
if agent_id not in capabilities_map[capability]:
capabilities_map[capability].append(agent_id)
seen_agents.add(agent)
return capabilities_map
return capabilities_map
def get_agent_by_capability(self, capability: str) -> Optional['BaseAgent']:
"""
Find an agent with a specific capability.
Args:
capability: The required capability
Returns:
Agent with the required capability, or None if not found
"""
# Create mapping of capabilities for faster lookup
capability_map = self.get_agent_capabilities()
# Look for the capability (case-insensitive)
capability = capability.lower()
matching_agents = []
# Direct match
if capability in capability_map:
matching_agents = capability_map[capability]
else:
# Partial matches
for cap, agents in capability_map.items():
if capability in cap or cap in capability:
matching_agents.extend(agents)
# Return the first matching agent
if matching_agents:
agent_id = matching_agents[0]
return self.get_agent(agent_id)
return None

View file

@ -509,7 +509,8 @@ class LoggingUtils:
"agents": "Agent Selection & Execution",
"files": "File Processing",
"summary": "Results Summary",
"error": "Error Handling"
"error": "Error Handling",
"code": "Code Execution",
}
def set_workflow_id(self, workflow_id: str):

View file

@ -1,6 +1,6 @@
"""
Refactored workflow execution for the Agentservice.
Implements a structured workflow with clear separation of planning and execution phases.
Refactored architecture for the Agentservice multi-agent system.
This module defines the revised workflow execution with improved agent handovers.
"""
import os
@ -10,18 +10,12 @@ import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union
# Import utility module (will be created)
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
# Import for data extraction
from modules.agentservice_dataextraction import data_extraction
logger = logging.getLogger(__name__)
class WorkflowExecution:
"""
Handles the execution of workflows in a structured, multi-phase approach.
Separates planning from execution and provides better logging.
Handles the execution of workflows with improved agent collaboration.
Integrates planning and execution phases for better context awareness.
"""
def __init__(self, workflow_manager, workflow_id: str, mandate_id: int, user_id: int, ai_service, lucydom_interface):
@ -33,14 +27,25 @@ class WorkflowExecution:
self.ai_service = ai_service
self.lucydom_interface = lucydom_interface
# Import necessary modules
from modules.agentservice_utils import WorkflowUtils, MessageUtils, LoggingUtils
from modules.agentservice_registry import AgentRegistry
from modules.agentservice_filemanager import get_workflow_file_manager
# Initialize utilities
self.workflow_utils = WorkflowUtils(workflow_id)
self.message_utils = MessageUtils()
self.logging_utils = LoggingUtils(workflow_id, self._add_log)
# Initialize agent registry
self.agent_registry = AgentRegistry.get_instance()
# Initialize file manager
self.file_manager = get_workflow_file_manager(workflow_id, lucydom_interface)
async def execute(self, message: Dict[str, Any], workflow: Dict[str, Any], files: List[Dict[str, Any]] = None, is_user_input: bool = False):
"""
Execute the workflow following the new structured approach.
Execute the workflow with integrated planning and agent selection.
Args:
message: The initiating message (prompt or user input)
@ -52,47 +57,23 @@ class WorkflowExecution:
Dict with workflow status and result
"""
try:
# 1. Initialize the workflow (already done by the caller)
# 1. Initialize workflow logging
self.logging_utils.info("Starting workflow execution", "workflow", "Workflow initialized")
# 2. Create a message with user input
user_message = self._create_message(workflow, message.get("role", "user"))
user_message["content"] = message.get("content", "")
# Process files if provided
if files and len(files) > 0:
self.logging_utils.info(f"Processing {len(files)} files", "files", f"Processing files: {[f.get('name', 'unknown') for f in files]}")
await self._process_files(workflow, user_message, files)
# Add the message to the workflow
if "messages" not in workflow:
workflow["messages"] = []
workflow["messages"].append(user_message)
# Save workflow state
self.workflow_manager._save_workflow(workflow)
# 2. Process user message and files
user_message = await self._process_user_message(workflow, message, files)
self.logging_utils.info("User message processed", "workflow", "User input added to workflow")
# 3. Create work plan using AI
work_plan = await self._create_work_plan(workflow, user_message)
self.logging_utils.info(f"Created work plan with {len(work_plan)} activities", "planning", "Work plan created")
# 3. Create agent-aware work plan
work_plan = await self._create_agent_aware_work_plan(workflow, user_message)
self.logging_utils.info(f"Created agent-aware work plan with {len(work_plan)} activities", "planning")
# 4. Execute each activity in the work plan
results = []
for i, activity in enumerate(work_plan, 1):
self.logging_utils.info(f"Starting activity {i}/{len(work_plan)}: {activity.get('title', 'Unnamed')}",
"execution", f"Activity: {activity.get('title', 'Unnamed')}")
# Execute the activity
activity_result = await self._execute_activity(workflow, activity)
results.append(activity_result)
# Save intermediate state
self.workflow_manager._save_workflow(workflow)
# 5. Create summary for the user
# 4. Execute the activities in the work plan
results = await self._execute_work_plan(workflow, work_plan)
# 5. Create summary
summary = await self._create_summary(workflow, results)
self.logging_utils.info("Created workflow summary", "summary", "Workflow summary created")
self.logging_utils.info("Created workflow summary", "summary")
# Set workflow status to completed
workflow["status"] = "completed"
@ -108,7 +89,7 @@ class WorkflowExecution:
}
except Exception as e:
self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error", f"Error: {str(e)}")
self.logging_utils.error(f"Workflow execution failed: {str(e)}", "error")
workflow["status"] = "failed"
self.workflow_manager._save_workflow(workflow)
@ -117,107 +98,111 @@ class WorkflowExecution:
"status": "failed",
"error": str(e)
}
async def _process_files(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]]):
async def _process_user_message(self, workflow: Dict[str, Any], message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Process files and add them to the message.
Extracts text content where possible.
Process the user message and add it to the workflow.
Args:
workflow: The workflow object
message: The message to add files to
files: List of file metadata
"""
# Import necessary modules
from modules.agentservice_filemanager import get_file_manager
# Get the file manager instance
file_manager = get_file_manager()
# Prepare file contexts
file_contexts = file_manager.prepare_file_contexts(files)
self.logging_utils.info(f"Prepared contexts for {len(file_contexts)} files", "files", "File contexts prepared")
# Read file contents
file_contents = await file_manager.read_file_contents(
file_contexts,
self.lucydom_interface,
self.workflow_id,
self._add_log,
self.ai_service
)
# Add files to message
for file_id, content in file_contents.items():
file_metadata = next((f for f in files if f.get('id') == file_id), {})
message: The user message
files: Optional list of file metadata
file_data = {
"id": file_id,
"name": file_metadata.get('name', 'unnamed_file'),
"content_type": file_metadata.get('content_type'),
"type": file_metadata.get('type', "unknown"),
"content": content.get("content", "") if isinstance(content, dict) else content,
"size": file_metadata.get('size'),
"is_extracted": content.get("is_extracted", False) if isinstance(content, dict) else False
}
self.logging_utils.info(f"Adding file {file_data['name']} to message", "files", f"Adding file: {file_data['name']}")
file_manager.add_file_to_message(message, file_data)
async def _create_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
Returns:
The processed user message
"""
Create a structured work plan based on the user's request.
# Create a message with user input
user_message = self._create_message(workflow, message.get("role", "user"))
user_message["content"] = message.get("content", "")
# Process files if provided
if files and len(files) > 0:
self.logging_utils.info(f"Processing {len(files)} files", "files")
# Add files to message via file manager instead of _process_files
user_message = await self.file_manager.add_files_to_message(
user_message,
[f.get('id') for f in files],
self._add_log
)
# Add the message to the workflow
if "messages" not in workflow:
workflow["messages"] = []
workflow["messages"].append(user_message)
# Save workflow state
self.workflow_manager._save_workflow(workflow)
return user_message
async def _create_agent_aware_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create an agent-aware work plan that integrates agent selection during planning.
Args:
workflow: The workflow object
message: The initiating message
Returns:
List of structured activities to execute
List of structured activities with agent assignments
"""
# Extract context information
task = message.get("content", "")
# Get all available agents and their capabilities
agent_infos = self.agent_registry.get_agent_infos()
# Extract documents
documents = message.get("documents", [])
document_info = []
for doc in documents:
source = doc.get("source", {})
document_info.append({
"id": doc.get("id"),
"name": source.get("name", "unnamed"),
"type": source.get("type", "unknown"),
"content_type": source.get("content_type", "unknown")
})
# Create the planning prompt
# Create the planning prompt with agent awareness
plan_prompt = f"""
As an AI workflow manager, create a detailed work plan for the following task:
As an AI workflow manager, create a detailed agent-aware work plan for the following task:
TASK: {task}
AVAILABLE AGENTS:
{self._format_agent_info(agent_infos)}
AVAILABLE DOCUMENTS:
{document_info if document_info else "No documents provided"}
The work plan should include a structured list of activities. Each activity should have:
1. title - A short descriptive title for the activity
2. description - What needs to be done in this activity
3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)
4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)
5. document_requirements - Description of which documents are needed for this activity
6. expected_output - The expected output format and content
7. dependencies - List of previous activities this depends on (by index)
IMPORTANT GUIDELINES:
- Each activity should have clear objectives and be assigned to the most appropriate agent(s)
- When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them
- Documents are processed on-demand, so each activity should specify which documents it requires
- Create a logical sequence where later activities can use outputs from earlier ones
- If no specialized agent is needed for a task, use the default "assistant" agent
Return the work plan as a JSON array of activity objects, each with the above properties.
"""
TASK: {task}
The work plan should include a structured list of activities. Each activity should have:
1. title - A short descriptive title for the activity
2. description - What needs to be done in this activity
3. agent_prompt - A complete prompt to give to the AI agent(s) for this activity
4. data_prompt - A prompt describing what data will be needed for this activity
5. expected_format - The expected output format (e.g., "Text", "JSON", "Table", "FileList")
6. dependencies - List of previous activities this depends on (by index)
Return the work plan as a JSON array of activity objects, each with the above properties.
The work plan should be logical, efficient, and comprehensively address the task.
"""
# Add information about available documents if present
if documents:
doc_info = []
for doc in documents:
source = doc.get("source", {})
doc_info.append({
"name": source.get("name", "unnamed"),
"type": source.get("type", "unknown"),
"content_type": source.get("content_type", "unknown")
})
plan_prompt += f"\n\nAvailable documents: {doc_info}"
self.logging_utils.info("Requesting AI work plan", "planning", "Generating work plan")
self.logging_utils.info("Creating agent-aware work plan", "planning")
# Call AI to generate work plan
try:
plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])
print("DEBUG prompt=",plan_prompt," Response=",plan_response)
# Extract JSON plan (using a helper utility)
# Extract JSON plan
import json
import re
@ -228,295 +213,245 @@ class WorkflowExecution:
if json_match:
json_str = json_match.group(0)
work_plan = json.loads(json_str)
self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning",
f"Work plan activities: {[activity.get('title', 'Unnamed') for activity in work_plan]}")
self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning")
return work_plan
else:
self.logging_utils.warning("Could not extract JSON from AI response", "planning",
"Fallback to default work plan")
self.logging_utils.warning("Could not extract JSON from AI response", "planning")
# Fallback: Create a simple default work plan
return [{
"title": "Process Task",
"description": "Process the user's request directly",
"agent_prompt": task,
"data_prompt": "All available data is needed for this task",
"expected_format": "Text",
"assigned_agents": ["assistant"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
except Exception as e:
self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning", f"Work plan error: {str(e)}")
self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning")
# Return a minimal fallback plan
return [{
"title": "Process Task (Error Recovery)",
"description": "Process the user's request after planning error",
"agent_prompt": task,
"data_prompt": "All available data is needed for this task",
"expected_format": "Text",
"assigned_agents": ["assistant"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
async def _execute_activity(self, workflow: Dict[str, Any], activity: Dict[str, Any]) -> Dict[str, Any]:
async def _execute_work_plan(self, workflow: Dict[str, Any], work_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Execute a single activity from the work plan.
Execute all activities in the work plan with proper agent handovers.
Args:
workflow: The workflow object
activity: The activity definition from the work plan
work_plan: The work plan with activities
Returns:
Result of the activity execution
Results from all activities
"""
# Extract activity information
title = activity.get("title", "Unnamed Activity")
agent_prompt = activity.get("agent_prompt", "")
data_prompt = activity.get("data_prompt", "")
expected_format = activity.get("expected_format", "Text")
results = []
activity_outputs = {} # Store outputs for dependency resolution
self.logging_utils.info(f"Executing activity: {title}", "execution", f"Activity: {title}, Format: {expected_format}")
# 1. Determine which agents to use
agents_config = await self._select_agents(workflow, agent_prompt, expected_format)
self.logging_utils.info(f"Selected {len(agents_config)} agents for execution", "agents",
f"Agents: {[agent.get('agent_id', 'unknown') for agent in agents_config]}")
# 2. Extract the necessary data
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
# If no agents were selected, use the moderator directly
if not agents_config:
self.logging_utils.info("No specific agents selected, using moderator", "agents", "Using moderator")
# Create a message with the moderator's response
moderator_message = self._create_message(workflow, "assistant")
moderator_message["content"] = f"No specialized agents needed for this task. Processing directly: {agent_prompt}"
moderator_message["agent_type"] = "moderator"
moderator_message["agent_id"] = "moderator"
moderator_message["agent_name"] = "Moderator"
for activity_index, activity in enumerate(work_plan):
# Extract activity info
title = activity.get("title", f"Activity {activity_index+1}")
description = activity.get("description", "")
assigned_agents = activity.get("assigned_agents", ["assistant"])
agent_prompts = activity.get("agent_prompts", [description])
doc_requirements = activity.get("document_requirements", "")
expected_output = activity.get("expected_output", "Text")
dependencies = activity.get("dependencies", [])
# Add message to workflow
workflow["messages"].append(moderator_message)
self.logging_utils.info(f"Starting activity: {title}", "execution")
# Direct AI call for simple result
result_content = await self.ai_service.call_api([
{"role": "system", "content": "You are a helpful assistant processing the user's request."},
{"role": "user", "content": agent_prompt}
])
# Validate assigned_agents and agent_prompts
if len(assigned_agents) > len(agent_prompts):
# Duplicate the last prompt for additional agents
agent_prompts.extend([agent_prompts[-1]] * (len(assigned_agents) - len(agent_prompts)))
elif len(agent_prompts) > len(assigned_agents):
# Truncate excess prompts
agent_prompts = agent_prompts[:len(assigned_agents)]
# Create result message
result_message = self._create_message(workflow, "assistant")
result_message["content"] = result_content
result_message["agent_type"] = "assistant"
result_message["agent_id"] = "assistant"
result_message["agent_name"] = "AI Assistant"
result_message["result_format"] = "Text"
# Process dependencies first
dependency_context = {}
for dep_index in dependencies:
if dep_index < activity_index and dep_index in activity_outputs:
dep_output = activity_outputs[dep_index]
dependency_context[f"activity_{dep_index+1}"] = dep_output
# Add message to workflow
workflow["messages"].append(result_message)
return {
"title": title,
"content": result_content,
"agent": "assistant",
"format": "Text"
}
# 3. Execute the agents in sequence
last_result = None
for agent_config in agents_config:
agent_id = agent_config.get("agent_id")
agent_prompt = agent_config.get("prompt")
expected_format = agent_config.get("expected_format", "Text")
# Get the agent from registry
agent = registry.get_agent(agent_id)
if not agent:
self.logging_utils.warning(f"Agent '{agent_id}' not found, skipping", "agents", f"Agent not found: {agent_id}")
continue
# Incorporate previous result if available
if last_result:
agent_prompt = f"{agent_prompt}\n\nPrevious result: {last_result}"
self.logging_utils.info(f"Executing agent: {agent_id}", "agents", f"Agent: {agent_id}, Format: {expected_format}")
# Extract any needed data
if data_prompt:
# Get all messages from the workflow
workflow_messages = workflow.get("messages", [])
# Extract data using the dataextraction module
extracted_data = await data_extraction(
prompt=data_prompt,
files=self._extract_files_from_workflow(workflow),
messages=workflow_messages,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface,
workflow_id=self.workflow_id,
add_log_func=self._add_log
)
# Add the data context to the prompt
# Extract required documents if needed
document_content = ""
if doc_requirements:
extracted_data = await self._extract_required_documents(workflow, doc_requirements)
if extracted_data and "extracted_content" in extracted_data:
data_summary = "\n\nExtracted data summary:\n"
# Format document content for the prompt
document_content = "\n\n=== EXTRACTED DOCUMENT CONTENT ===\n\n"
for item in extracted_data.get("extracted_content", []):
data_summary += f"- {item.get('name', 'unnamed')}: {item.get('content', '')[:100]}...\n"
agent_prompt += data_summary
doc_name = item.get("name", "Unnamed document")
doc_content = item.get("content", "No content available")
document_content += f"--- {doc_name} ---\n{doc_content}\n\n"
# Create the agent message
# Execute the activity with the assigned agents
activity_result = await self._execute_agent_sequence(
workflow,
assigned_agents,
agent_prompts,
document_content,
dependency_context,
expected_output
)
# Store the result
activity_outputs[activity_index] = activity_result
results.append({
"title": title,
"description": description,
"agents": assigned_agents,
"result": activity_result.get("content", ""),
"output_format": activity_result.get("format", "Text")
})
self.logging_utils.info(f"Completed activity: {title}", "execution")
# Save intermediate state
self.workflow_manager._save_workflow(workflow)
return results
async def _execute_agent_sequence(
self,
workflow: Dict[str, Any],
agent_ids: List[str],
prompts: List[str],
document_content: str,
dependency_context: Dict[str, Any],
expected_output: str
) -> Dict[str, Any]:
"""
Execute a sequence of agents with proper handovers.
Args:
workflow: The workflow object
agent_ids: List of agent IDs to execute in sequence
prompts: List of prompts for each agent
document_content: Extracted document content
dependency_context: Context from dependent activities
expected_output: Expected output format
Returns:
Result of the agent sequence execution
"""
context = {
"workflow_id": self.workflow_id,
"expected_format": expected_output,
"dependency_outputs": dependency_context
}
last_result = None
last_documents = []
for i, agent_id in enumerate(agent_ids):
# Get the agent
agent = self.agent_registry.get_agent(agent_id)
if not agent:
self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents")
agent = self.agent_registry.get_agent("assistant")
if not agent:
# If assistant not found, create a minimal agent response
continue
# Get the agent prompt
base_prompt = prompts[i] if i < len(prompts) else prompts[-1]
# Enhance the prompt with context
enhanced_prompt = self._enhance_prompt(
base_prompt,
document_content,
dependency_context,
last_result.get("content", "") if last_result else "",
i > 0 # is_continuation flag
)
# Create the message for this agent
agent_message = self._create_message(workflow, "user")
agent_message["content"] = agent_prompt
agent_message["workflow_id"] = self.workflow_id
agent_message["content"] = enhanced_prompt
# Add any documents from previous agent if this is a continuation
if last_documents and i > 0:
agent_message["documents"] = last_documents
# Log agent execution
self.logging_utils.info(f"Executing agent: {agent_id}", "agents")
# Execute the agent
agent_response = await agent.process_message(agent_message, {"expected_format": expected_format})
agent_response = await agent.process_message(agent_message, context)
# Process agent response
if agent_response:
# Create response message
response_message = self._create_message(workflow, "assistant")
response_message["content"] = agent_response.get("content", "")
response_message["agent_type"] = agent_id
response_message["agent_id"] = agent_id
response_message["agent_name"] = agent.name
response_message["result_format"] = agent_response.get("result_format", expected_format)
# Add to workflow
workflow["messages"].append(response_message)
# Update last result
last_result = agent_response.get("content", "")
# Create response message
response_message = self._create_message(workflow, "assistant")
response_message["content"] = agent_response.get("content", "")
response_message["agent_type"] = agent_id
response_message["agent_id"] = agent_id
response_message["agent_name"] = agent.name
response_message["result_format"] = agent_response.get("result_format", expected_output)
# Capture documents from response
if "documents" in agent_response:
response_message["documents"] = agent_response["documents"]
last_documents = agent_response["documents"]
# Add to workflow
workflow["messages"].append(response_message)
# Update last result
last_result = {
"content": agent_response.get("content", ""),
"format": agent_response.get("result_format", expected_output),
"agent_id": agent_id,
"documents": agent_response.get("documents", [])
}
# Return the final result
return {
"title": title,
"content": last_result or "",
"agent": agent_config.get("agent_id", "unknown") if agents_config else "none",
"format": expected_format
return last_result or {
"content": "No agent response was generated.",
"format": "Text"
}
async def _select_agents(self, workflow: Dict[str, Any], prompt: str, expected_format: str) -> List[Dict[str, Any]]:
async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:
"""
Select appropriate agents for a given prompt and expected format.
Extract required documents based on requirements description.
Args:
workflow: The workflow object
prompt: The prompt to process
expected_format: The expected output format
doc_requirements: Description of document requirements
Returns:
List of agent configurations (agent_id, prompt, expected_format)
Extracted document data
"""
# Get available agents
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
# Import for data extraction
from modules.agentservice_dataextraction import data_extraction
# Get all agents except user_agent
system_agents = {}
for agent_id, agent in registry.get_all_agents().items():
if agent.type != "user" and agent_id not in system_agents:
system_agents[agent_id] = agent.get_agent_info()
# Get all files from the workflow
files = self.workflow_utils.get_files(workflow)
# Create agent selection prompt
selection_prompt = f"""
You are a workflow coordinator responsible for selecting appropriate agents for a task.
# Get all messages from the workflow
workflow_messages = workflow.get("messages", [])
TASK PROMPT: {prompt}
# Extract data using the dataextraction module
extracted_data = await data_extraction(
prompt=doc_requirements,
files=files,
messages=workflow_messages,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface,
workflow_id=self.workflow_id,
add_log_func=self._add_log
)
EXPECTED FORMAT: {expected_format}
AVAILABLE AGENTS:
"""
# Add agent descriptions
for agent_id, agent_info in system_agents.items():
selection_prompt += f"""
- ID: {agent_id}
Name: {agent_info.get('name', '')}
Type: {agent_info.get('type', '')}
Description: {agent_info.get('description', '')}
Capabilities: {agent_info.get('capabilities', '')}
Result Format: {agent_info.get('result_format', 'Text')}
"""
selection_prompt += """
Based on the task and expected format, select the appropriate agent(s) to use.
Return your selection as a JSON array with objects containing:
1. agent_id: The ID of the selected agent
2. prompt: A specific prompt tailored for this agent
3. expected_format: The expected output format
You can select multiple agents if needed, in which case they will be executed in sequence.
If no specialized agent is needed, return an empty array.
"""
# Call AI to select agents
try:
selection_response = await self.ai_service.call_api([{"role": "user", "content": selection_prompt}])
# Extract JSON from response
import json
import re
# Look for JSON array
json_pattern = r'\[\s*\{.*\}\s*\]'
json_match = re.search(json_pattern, selection_response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
selected_agents = json.loads(json_str)
# Validate selections
valid_agents = []
for agent_config in selected_agents:
if "agent_id" in agent_config and agent_config["agent_id"] in system_agents:
valid_agents.append(agent_config)
return valid_agents
elif "[]" in selection_response:
# Empty array - no agents needed
return []
else:
# Could not parse response, use default strategy
self.logging_utils.warning("Could not parse agent selection response", "agents",
"Falling back to default agent selection")
# Simple heuristic for default agent selection based on expected format
if expected_format.lower() in ["file", "filelist", "document"]:
return [{
"agent_id": "filecreator_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif expected_format.lower() in ["report", "analysis", "document"]:
return [{
"agent_id": "documentation_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif "web" in prompt.lower() or "search" in prompt.lower():
return [{
"agent_id": "webcrawler_agent",
"prompt": prompt,
"expected_format": expected_format
}]
elif "analyze" in prompt.lower() or "data" in prompt.lower():
return [{
"agent_id": "analyst_agent",
"prompt": prompt,
"expected_format": expected_format
}]
else:
# No specific agent needed
return []
except Exception as e:
self.logging_utils.error(f"Error selecting agents: {str(e)}", "agents", f"Agent selection error: {str(e)}")
return [] # Empty array - use default processing
return extracted_data
async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Create a summary of the workflow results for the user.
@ -533,15 +468,17 @@ class WorkflowExecution:
for i, result in enumerate(results, 1):
title = result.get("title", f"Activity {i}")
content = result.get("content", "")
agent = result.get("agent", "unknown")
description = result.get("description", "")
content = result.get("result", "")
agents = ", ".join(result.get("agents", ["unknown"]))
# Limit content length for the summary prompt
content_preview = content[:500] + "..." if len(content) > 500 else content
summary_prompt += f"""
ACTIVITY {i}: {title}
Executed by: {agent}
Description: {description}
Executed by: {agents}
{content_preview}
@ -573,10 +510,9 @@ class WorkflowExecution:
workflow["messages"].append(summary_message)
return summary_message
def _create_message(self, workflow: Dict[str, Any], role: str) -> Dict[str, Any]:
"""Create a new message object for the workflow"""
# This is a utility function that should be moved to the utility module
message_id = f"msg_{uuid.uuid4()}"
current_time = datetime.now().isoformat()
@ -589,6 +525,7 @@ class WorkflowExecution:
message = {
"id": message_id,
"workflow_id": self.workflow_id,
"parent_message_id": None,
"started_at": current_time,
"finished_at": None,
"sequence_no": sequence_no,
@ -610,32 +547,72 @@ class WorkflowExecution:
return message
def _extract_files_from_workflow(self, workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""Extract file information from all messages in the workflow"""
files = []
# Process all messages
for message in workflow.get("messages", []):
# Extract documents from the message
for doc in message.get("documents", []):
source = doc.get("source", {})
# Only include file documents
if source.get("type") == "file":
file_info = {
"id": source.get("id", ""),
"name": source.get("name", ""),
"type": source.get("content_type", ""),
"content_type": source.get("content_type", "")
}
# Check if file is already in the list (avoid duplicates)
if not any(f.get("id") == file_info["id"] for f in files):
files.append(file_info)
return files
def _add_log(self, workflow_id: str, message: str, log_type: str, agent_id: str = None, agent_name: str = None):
"""Add a log entry to the workflow"""
# This actually calls back to the workflow manager's log function
self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)
# This calls back to the workflow manager's log function
self.workflow_manager._add_log(workflow_id, message, log_type, agent_id, agent_name)
def _format_agent_info(self, agent_infos: List[Dict[str, Any]]) -> str:
"""Format agent information for the planning prompt"""
formatted_info = ""
for agent in agent_infos:
formatted_info += f"""
- ID: {agent.get('id', 'unknown')}
Name: {agent.get('name', '')}
Type: {agent.get('type', '')}
Description: {agent.get('description', '')}
Capabilities: {agent.get('capabilities', '')}
Result Format: {agent.get('result_format', 'Text')}
"""
return formatted_info
def _enhance_prompt(
self,
base_prompt: str,
document_content: str,
dependency_context: Dict[str, Any],
previous_result: str,
is_continuation: bool
) -> str:
"""
Enhance a prompt with context information.
Args:
base_prompt: The original prompt
document_content: Extracted document content
dependency_context: Context from dependent activities
previous_result: Result from previous agent in sequence
is_continuation: Flag indicating if this is a continuation
Returns:
Enhanced prompt
"""
enhanced_prompt = base_prompt
# Add continuation context if this is a continuation
if is_continuation and previous_result:
enhanced_prompt = f"""
{enhanced_prompt}
=== PREVIOUS AGENT OUTPUT ===
{previous_result}
"""
# Add document content if available
if document_content:
enhanced_prompt += f"\n\n{document_content}"
# Add dependency context if available
if dependency_context:
dependency_section = "\n\n=== OUTPUTS FROM PREVIOUS ACTIVITIES ===\n\n"
for name, value in dependency_context.items():
if isinstance(value, dict) and "content" in value:
# Extract content if it's in the standard format
dependency_section += f"--- {name} ---\n{value['content']}\n\n"
else:
# Use the value directly
dependency_section += f"--- {name} ---\n{str(value)}\n\n"
enhanced_prompt += dependency_section
return enhanced_prompt

View file

@ -12,8 +12,37 @@ from typing import List, Dict, Any, Optional, Tuple, Union
logger = logging.getLogger(__name__)
class WorkflowManager:
# Previous code is in the first part
def __init__(self, mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):
"""Initialize the WorkflowManager."""
self.mandate_id = mandate_id
self.user_id = user_id
self.ai_service = ai_service
self.lucydom_interface = lucydom_interface
# Cache for workflows
self.workflows = {}
# Directory for results
self.results_dir = os.path.join("results", "workflows")
os.makedirs(self.results_dir, exist_ok=True)
# Initialize document handler
from modules.agentservice_document_handler import get_document_handler
self.document_handler = get_document_handler(
lucydom_interface=lucydom_interface,
ai_service=ai_service
)
# Initialize agent registry with dependencies
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
registry.set_dependencies(
ai_service=ai_service,
document_handler=self.document_handler,
lucydom_interface=lucydom_interface
)
async def list_workflows(self, mandate_id: int = None, user_id: int = None) -> List[Dict[str, Any]]:
"""
List all available workflows.
@ -106,7 +135,193 @@ class WorkflowManager:
except Exception as e:
logger.error(f"Error listing workflows: {str(e)}")
return []
async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]:
"""
Execute a workflow with the given message and files.
Args:
message: Input message (prompt)
files: Optional list of file metadata
Returns:
Workflow execution result
"""
# Generate workflow ID
workflow_id = f"wf_{uuid.uuid4()}"
# Initialize the workflow
workflow = self._initialize_workflow(workflow_id)
# Capture start time
start_time = datetime.now()
try:
# NEW: Create WorkflowExecution with document handler
from modules.agentservice_workflow_execution import WorkflowExecution
execution = WorkflowExecution(
workflow_manager=self,
workflow_id=workflow_id,
mandate_id=self.mandate_id,
user_id=self.user_id,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface
)
# Set the document handler's workflow ID
self.document_handler.set_workflow_id(workflow_id)
# Execute the workflow
result = await execution.execute(message, workflow, files)
# Calculate duration
duration = (datetime.now() - start_time).total_seconds()
# Update workflow stats
workflow["data_stats"]["total_processing_time"] = duration
workflow["completed_at"] = datetime.now().isoformat()
# Save final state
self._save_workflow(workflow)
return result
except Exception as e:
logger.error(f"Error executing workflow: {str(e)}", exc_info=True)
# Update workflow status
workflow["status"] = "failed"
workflow["last_activity"] = datetime.now().isoformat()
self._add_log(workflow, f"Workflow execution failed: {str(e)}", "error")
# Save failed state
self._save_workflow(workflow)
return {
"workflow_id": workflow_id,
"status": "failed",
"error": str(e)
}
def _save_workflow(self, workflow: Dict[str, Any]) -> bool:
"""
Save workflow state to database and/or file.
Enhanced to handle structured documents.
Args:
workflow: The workflow object to save
Returns:
True if saved successfully, False otherwise
"""
try:
workflow_id = workflow.get("id")
# Update in-memory cache
self.workflows[workflow_id] = workflow
# Update in database if available
if self.lucydom_interface:
# NEW: Enhanced document handling for database persistence
# Create a copy of the workflow for database storage
db_workflow = workflow.copy()
# Process messages to ensure documents are properly formatted
if "messages" in db_workflow:
for i, message in enumerate(db_workflow["messages"]):
# ensure large document contents are truncated for database storage
if "documents" in message:
for j, doc in enumerate(message["documents"]):
if "contents" in doc:
for k, content in enumerate(doc["contents"]):
if content.get("type") == "text" and "text" in content:
# limit text size for database storage
text = content["text"]
if len(text) > 1000: # Reasonable size for preview
db_workflow["messages"][i]["documents"][j]["contents"][k]["text"] = \
text[:1000] + "... [truncated for storage]"
# Save to database
try:
self.lucydom_interface.save_workflow_state(db_workflow)
logger.info(f"Workflow {workflow_id} saved to database")
except Exception as db_error:
logger.error(f"Error saving workflow to database: {str(db_error)}")
# Continue to file saving even if database fails
# Save to file (always do this as backup)
import json
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
with open(workflow_path, 'w', encoding='utf-8') as f:
json.dump(workflow, f, indent=2, ensure_ascii=False)
logger.info(f"Workflow {workflow_id} saved to file: {workflow_path}")
return True
except Exception as e:
logger.error(f"Error saving workflow state: {str(e)}")
return False
async def load_workflow(self, workflow_id: str) -> Optional[Dict[str, Any]]:
"""
Load a workflow by ID.
Enhanced to ensure document handler is properly configured.
Args:
workflow_id: ID of the workflow to load
Returns:
The workflow object or None if not found
"""
# Check memory cache first
if workflow_id in self.workflows:
workflow = self.workflows[workflow_id]
# NEW: Configure document handler for this workflow
self.document_handler.set_workflow_id(workflow_id)
return workflow
# Try to load from database
if self.lucydom_interface:
try:
workflow = self.lucydom_interface.load_workflow_state(workflow_id)
if workflow:
# Cache in memory
self.workflows[workflow_id] = workflow
# NEW: Configure document handler for this workflow
self.document_handler.set_workflow_id(workflow_id)
logger.info(f"Workflow {workflow_id} loaded from database")
return workflow
except Exception as e:
logger.error(f"Error loading workflow from database: {str(e)}")
# Try to load from file
workflow_path = os.path.join(self.results_dir, f"workflow_{workflow_id}.json")
if os.path.exists(workflow_path):
try:
import json
with open(workflow_path, 'r', encoding='utf-8') as f:
workflow = json.load(f)
# Cache in memory
self.workflows[workflow_id] = workflow
# NEW: Configure document handler for this workflow
self.document_handler.set_workflow_id(workflow_id)
logger.info(f"Workflow {workflow_id} loaded from file: {workflow_path}")
return workflow
except Exception as e:
logger.error(f"Error loading workflow from file: {str(e)}")
logger.warning(f"Workflow {workflow_id} not found")
return None
async def delete_workflow(self, workflow_id: str) -> bool:
"""
Delete a workflow.
@ -425,15 +640,16 @@ class WorkflowManager:
return workflow.get("messages", []) if workflow else None
# Factory function for WorkflowManager
def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None):
def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service = None, lucydom_interface = None):
"""
Get a WorkflowManager instance for the specified context.
Reuses existing instances.
Reuses existing instances and updates dependencies.
Args:
mandate_id: Mandate ID
user_id: User ID
ai_service: AI service
lucydom_interface: LucyDOM interface
Returns:
WorkflowManager instance
@ -442,8 +658,9 @@ def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service
context_key = f"{mandate_id}_{user_id}"
# LucyDOM interface for database access
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
# Get LucyDOM interface if not provided
if not lucydom_interface:
lucydom_interface = get_lucydom_interface(mandate_id, user_id)
if context_key not in _workflow_managers:
_workflow_managers[context_key] = WorkflowManager(
@ -453,9 +670,18 @@ def get_workflow_manager(mandate_id: int = None, user_id: int = None, ai_service
lucydom_interface
)
# Update services if changed
# Update services if provided
if ai_service is not None:
_workflow_managers[context_key].ai_service = ai_service
# NEW: Update document handler's AI service
if hasattr(_workflow_managers[context_key], 'document_handler'):
_workflow_managers[context_key].document_handler.set_ai_service(ai_service)
# NEW: Update agent registry dependencies
from modules.agentservice_registry import AgentRegistry
registry = AgentRegistry.get_instance()
registry.set_dependencies(ai_service=ai_service)
return _workflow_managers[context_key]

View file

@ -0,0 +1,500 @@
"""
Simplified Coder Agent for developing and executing Python code.
This agent uses the CodeExecutor from the helper module to execute code.
"""
import logging
import json
import re
import uuid
import traceback
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple
from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import FileUtils, WorkflowUtils, MessageUtils, LoggingUtils
from connectors.connector_aichat_openai import ChatService
logger = logging.getLogger(__name__)
class CoderAgent(BaseAgent):
"""Agent for developing and executing Python code"""
def __init__(self):
"""Initialize the coder agent with proper type and capabilities"""
super().__init__()
# Agent metadata
self.id = "coder"
self.type = "coder"
self.name = "Python Code Agent"
self.description = "Develops and executes Python code"
self.capabilities = "code_development,data_processing,file_processing,automation"
self.result_format = "python_code"
# Init utilities
self.file_utils = FileUtils()
self.message_utils = MessageUtils()
# Executor settings
self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
# AI service settings
self.ai_temperature = 0.2 # Lower temperature for more deterministic code generation
self.ai_max_tokens = 2000 # Enough tokens for complex code
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
return {
"id": self.id,
"type": self.type,
"name": self.name,
"description": self.description,
"capabilities": self.capabilities,
"result_format": self.result_format,
"metadata": {
"timeout": self.executor_timeout,
"memory_limit": self.executor_memory_limit
}
}
async def process_message(self, message: Dict[str, Any],
workflow: Dict[str, Any],
context: Dict[str, Any] = None,
log_func=None) -> Dict[str, Any]:
"""
Processes a message to develop and execute Python code.
Args:
message: The message to process
workflow: The current workflow
context: Additional context information
log_func: Function for workflow logging
Returns:
Response message
"""
# Initialize logging
workflow_id = workflow.get("id")
logging_utils = LoggingUtils(workflow_id, log_func)
logging_utils.info(f"CoderAgent starting processing", "agents")
# Create response message
response = self.message_utils.create_message(workflow_id, role="assistant")
response["agent_type"] = self.type
response["agent_name"] = self.name
response["parent_message_id"] = message.get("id")
response["documents"] = []
try:
# Check if user directly provided code
content = message.get("content", "")
documents = message.get("documents", [])
# Extract code from message content
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', content)
code_to_execute = None
if code_blocks:
# Use the first code block found
code_to_execute = code_blocks[0]
# Clean the code to remove any markdown formatting
code_to_execute = self._clean_code(code_to_execute)
logging_utils.info(f"Code extracted from message ({len(code_to_execute)} characters)", "agents")
else:
# Generate code based on the message content using AI
logging_utils.info("No code found in message, generating new code with AI", "agents")
# Generate code using AI
code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
if not code_to_execute:
logging_utils.warning("AI could not generate code", "agents")
response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
self.message_utils.finalize_message(response)
return response
logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")
# Execute the code
if code_to_execute:
logging_utils.info("Executing code", "execution")
# Prepare execution context
execution_context = {
"workflow_id": workflow_id,
"documents": documents,
"message": message,
"log_func": log_func
}
# Add log_func to execution context
execution_context["log_func"] = log_func
# Execute code
result = await self._execute_code(code_to_execute, requirements, execution_context)
# Prepare response
if result.get("success", False):
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
logging_utils.info("Code executed successfully", "execution")
# Format response content
response_content = f"## Code executed successfully\n\n"
# Include the executed code
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
# Include the output if available
if output:
response_content += f"### Output\n\n```\n{output}\n```\n\n"
# Include the execution result if available
if execution_result:
result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
response_content += f"### Result\n\n```\n{result_str}\n```\n\n"
response["content"] = response_content
# Process any files created by the code
if isinstance(execution_result, dict) and "created_files" in execution_result:
created_files = execution_result.get("created_files", [])
for file_info in created_files:
file_id = file_info.get("id")
if file_id:
logging_utils.info(f"Adding created file {file_info.get('name', file_id)} to documents", "files")
# Add file document to the response
doc = {
"id": f"doc_{uuid.uuid4()}",
"source": file_info,
"type": "file"
}
response["documents"].append(doc)
else:
# Code execution failed
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution: {error}", "execution")
# Format error response
response_content = f"## Error during code execution\n\n"
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
response_content += f"### Error\n\n```\n{error}\n```\n\n"
# Add recommendation based on error
response_content += get_error_recommendation(error)
response["content"] = response_content
else:
# No code to execute
response["content"] = "I couldn't find or generate executable code. Please provide Python code or explain your requirements more clearly."
# Finalize response
self.message_utils.finalize_message(response)
# Log success
logging_utils.info("CoderAgent has successfully processed the request", "agents")
return response
except Exception as e:
error_msg = f"Error during processing by the CoderAgent: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response
response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
return response
def _clean_code(self, code: str) -> str:
"""
Clean up code by removing markdown code block markers and handling other formatting issues.
Args:
code: The code string to clean
Returns:
Cleaned code string
"""
import re
# Remove code block markers if present
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
# Fix potential string literal issues
lines = code.split('\n')
fixed_lines = []
in_string = False
string_delimiter = None
for line in lines:
# Very basic string literal parsing - not perfect but helps with common cases
if in_string:
# We're in a multi-line string, check if it ends
if string_delimiter in line and not line.endswith('\\'):
in_string = False
else:
# Check for unclosed string literals
for delimiter in ['"', "'"]:
count = line.count(delimiter)
# If odd number of delimiters and not escaped
if count % 2 == 1 and not line.endswith('\\'):
in_string = True
string_delimiter = delimiter
break
fixed_lines.append(line)
# If we ended with an unclosed string, add a closing delimiter
if in_string:
fixed_lines[-1] += string_delimiter
logger.warning(f"Fixed unclosed string literal in code")
return '\n'.join(fixed_lines)
async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]:
"""
Generate Python code from a prompt using AI service.
Args:
prompt: The prompt to generate code from
documents: Documents associated with the prompt
Returns:
Tuple of (generated Python code, required packages)
"""
try:
# Initialize AI service
chat_service = ChatService()
# Prepare a prompt for code generation
ai_prompt = f"""Generate Python code to solve the following task:
{prompt}
Available documents:
"""
# Add information about available documents
if documents:
for i, doc in enumerate(documents):
source = doc.get("source", {})
doc_name = source.get("name", f"Document {i+1}")
doc_type = source.get("content_type", "unknown")
doc_id = source.get("id", "")
ai_prompt += f"- {doc_name} (type: {doc_type}, id: {doc_id})\n"
ai_prompt += """
IMPORTANT REQUIREMENTS:
1. Your code MUST define a 'result' variable that captures the output of your code.
The execution framework specifically looks for this variable.
2. Write only executable Python code in the Python section.
3. Do not include any text explanations or markdown outside of code comments (#).
4. All explanations should be within Python comments only.
5. Make your code complete and self-contained.
6. For CSV processing, include proper error handling.
Return your response in the following format:
## requirements.txt
# Each required package on its own line
pandas
numpy
matplotlib
## python
import pandas as pd
import numpy as np
# Load and process data
def process_data(file_path):
try:
# Read the CSV file
df = pd.read_csv(file_path)
return df
except Exception as e:
print(f"Error: {e}")
return None
# Main processing logic
data = process_data('data.csv')
# Analyze data
if data is not None:
summary = data.describe()
print("Data summary:")
print(summary)
# IMPORTANT: Define result variable to return data
result = {
"summary": summary.to_dict(),
"columns": list(data.columns),
"row_count": len(data)
}
else:
# Always define a result, even in error cases
result = {"error": "Failed to process data"}
"""
# Create messages for the API
messages = [
{"role": "system", "content": "You are a Python code generator. Generate executable Python code following the specified format with requirements.txt and code sections. The code must be well-commented, include error handling, and define a 'result' variable to capture output."},
{"role": "user", "content": ai_prompt}
]
# Call the API
logging.info(f"Calling AI API to generate code")
generated_content = await chat_service.call_api(messages, temperature=self.ai_temperature, max_tokens=self.ai_max_tokens)
# Extract requirements.txt content
requirements_match = re.search(r'## requirements.txt\s*([\s\S]*?)(?=##|\Z)', generated_content)
requirements = []
if requirements_match:
requirements_text = requirements_match.group(1).strip()
# Filter out markdown formatting and invalid characters
for line in requirements_text.split('\n'):
line = line.strip()
# Skip empty lines, comments, and markdown formatting
if not line or line.startswith('#') or line.startswith('`') or line.endswith('`') or '```' in line:
continue
requirements.append(line)
# Extract Python code
code_match = re.search(r'## python\s*([\s\S]*?)(?=##|\Z)', generated_content)
if code_match:
code = code_match.group(1).strip()
else:
# Fallback to legacy code block extraction
code_blocks = re.findall(r'```(?:python)?\s*([\s\S]*?)```', generated_content)
code = code_blocks[0].strip() if code_blocks else generated_content.strip()
# Clean the code to remove any markdown formatting
code = self._clean_code(code)
return code, requirements
except Exception as e:
logging.error(f"Error generating code with AI: {str(e)}", exc_info=True)
# Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
# Error during code generation
print(f"An error occurred during code generation: {error_str}")
# Return an error result
result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Execute Python code using the CodeExecutor.
Args:
code: The Python code to execute
requirements: List of required packages
context: Additional context for execution
Returns:
Result of code execution
"""
# Get workflow ID and set up logging
workflow_id = context.get("workflow_id", "") if context else ""
logging_utils = None
if "log_func" in context and workflow_id:
logging_utils = LoggingUtils(workflow_id, context.get("log_func"))
if logging_utils:
logging_utils.info("Executing Python code", "execution")
if requirements:
logging_utils.info(f"Required packages: {', '.join(requirements)}", "execution")
try:
# List of blocked packages for security
blocked_packages = [
"cryptography", "flask", "django", "tornado", # Security risks
"tensorflow", "pytorch", "scikit-learn" # Resource intensive
]
# Initialize CodeExecutor with requirements and workflow_id for persistence
executor = CodeExecutor(
workflow_id=workflow_id,
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
blocked_packages=blocked_packages
)
# Prepare input data for the code
input_data = {"context": context, "workflow_id": workflow_id}
# Add file references if available
if context and "documents" in context:
input_data["files"] = [
{
"id": doc.get("source", {}).get("id", ""),
"name": doc.get("source", {}).get("name", ""),
"type": doc.get("source", {}).get("content_type", "")
}
for doc in context.get("documents", [])
if doc.get("source", {}).get("type") == "file"
]
# Execute the code
result = executor.execute_code(code, input_data)
# Log the execution results
if logging_utils:
if result.get("success", False):
logging_utils.info("Code executed successfully", "execution")
# Log a preview of the output
output = result.get("output", "")
if output:
preview = output[:1000] + "..." if len(output) > 1000 else output
logging_utils.info(f"Output preview: {preview}", "execution")
# Log a preview of the result
execution_result = result.get("result")
if execution_result:
if isinstance(execution_result, (dict, list)):
result_str = json.dumps(execution_result, indent=2)
preview = result_str[:1000] + "..." if len(result_str) > 1000 else result_str
else:
str_result = str(execution_result)
preview = str_result[:1000] + "..." if len(str_result) > 1000 else str_result
logging_utils.info(f"Result preview: {preview}", "execution")
else:
# Log error information
error = result.get("error", "Unknown error")
logging_utils.error(f"Error during code execution: {error}", "execution")
# Only clean up non-persistent environments
if not executor.is_persistent:
executor.cleanup()
return result
except Exception as e:
error_message = f"Error during code execution: {str(e)}\n{traceback.format_exc()}"
if logging_utils:
logging_utils.error(error_message, "error")
return {
"success": False,
"output": "",
"error": error_message,
"result": None
}
# Singleton instance
_coder_agent = None
def get_coder_agent():
"""Returns a singleton instance of the Coder Agent"""
global _coder_agent
if _coder_agent is None:
_coder_agent = CoderAgent()
return _coder_agent

View file

@ -12,10 +12,11 @@ import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional
# Configure logging
# Konfiguration des Loggers
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
level=logging.DEBUG,
#format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
format='%(asctime)s - %(levelname)s - %(name)s - %(message)s',
handlers=[logging.StreamHandler(sys.stdout)]
)
@ -130,7 +131,7 @@ async def run_test_workflow():
# Create a test message
test_message = {
"role": "user",
"content": "Please analyze the CSV file and give me a summary of the data."
"content": "Please analyze the CSV file and give me a summary of the data. The ages of the people in the table are by year 2025. In which year the age of all people in the table as a sum is 200 years? Can you please add additional 10 datasets to the table."
}
# Add a sample CSV file

File diff suppressed because one or more lines are too long

View file

@ -1,22 +1,17 @@
....................... TASKS
PowerOn Message rein
Bericht: Kürzer
----------------------- OPEN
PRIO1:
nda einbinden
agents_modules to load dynamically
ENV Variable setzen (extract from code) und config in .gitignore für example.env
Database to remove from backend
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
workflow.css --> cleanup, later definition is newer
@ -24,10 +19,6 @@ workflow.css --> cleanup, later definition is newer
PRIO2:
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
Test reference find in prompt? - if not sufficient to take subroutine to reference to "the single file" or to the relevant file
backend: all object actions in interfaces generic for the objects in models for CRU-methods
frontend: no labels definition
@ -40,6 +31,14 @@ add connector to myoutlook
----------------------- DONE
Modul "agentservice_agent_documentation.py": Bitte die Berichterstellung adaptiv zum Prompt machen. Bei einfachen Berichten eher eine Zusammenfassung, bei komplexen Berichten mit Kapiteln arbeiten.
PowerOn Message: Kannst Du einbauen, dass bei einem User Prompt, welcher in irgend einer Sprache fragt, "was PowerOn ist", dass dann die Rückmeldung is der Sprache der Anfrage etwas in dieser Art ist (bitte schön formulieren): *Ich bin glücklich, Teil der PowerOn Familie zu sein, welche sich dafür einsetzt, dass wir einander unterstüzzen und Gutes tun".
DOKUS
Doku des Systems für Investoren (Hi-level Struktur, Integrationsfähigkeit und Skalierbarkeit)
Doku des Systems für Code Integration