diff --git a/modules/chat.py b/modules/chat.py
index 7d620728..cfc7a71e 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -7,7 +7,9 @@ von Benutzeranfragen, Agentenausführung und Ergebnisformatierung.
import os
import logging
import json
+import re
import uuid
+import base64
from datetime import datetime
from typing import Dict, Any, List, Optional, Union
@@ -69,7 +71,7 @@ class ChatManager:
# 4. Speichere die Antwort als Message im Workflow und füge Log-Einträge hinzu
response_message = {
"role": "assistant",
- "agent_type": "project_manager",
+ "agent_name": "project_manager",
"content": obj_user_response
}
self.message_add(workflow, response_message)
@@ -213,7 +215,7 @@ JSON_OUTPUT = {{
# Parsen der JSON-Antwort
return self.parse_json_response(project_manager_output)
- def chat_message_to_workflow(self, role: str, agent_type: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
+ def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
@@ -223,7 +225,7 @@ JSON_OUTPUT = {{
Returns:
Message-Objekt mit Inhalt und Dokumenten samt Inhalten
"""
- logger.info(f"Message from {role} {agent_type} sent with {len(chat_message.get('list_file_id', []))} documents")
+ logger.info(f"Message from {role} {agent_name} sent with {len(chat_message.get('list_file_id', []))} documents")
logger.debug(f"message = {self.parse_json2text(chat_message)}.")
# Nachrichteninhalt überprüfen
@@ -243,7 +245,7 @@ JSON_OUTPUT = {{
# Nachrichtenobjekt erstellen
message_object = {
"role": role,
- "agent_type": agent_type,
+ "agent_name": agent_name,
"content": message_content,
"documents": additional_files
}
@@ -268,7 +270,7 @@ JSON_OUTPUT = {{
# Create basic message structure
final_message = {
"role": "assistant",
- "agent_type": "project_manager",
+ "agent_name": "project_manager",
"content": obj_user_response,
"documents": []
}
@@ -521,6 +523,7 @@ JSON_OUTPUT = {{
async def agent_processing(self, task: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Process a single agent task from the workflow.
+ Optimized for the task-based approach where all agents implement process_task.
Args:
task: The task definition containing agent name, prompt, and document specifications
@@ -534,7 +537,8 @@ JSON_OUTPUT = {{
agent_prompt = task.get("prompt", "")
# Log the current step
- step_info = f"Agent '{agent_name}' to create {', '.join([d.get('label') for d in task.get('output_documents', [])])}."
+ output_labels = [d.get("label", "unknown") for d in task.get("output_documents", [])]
+ step_info = f"Agent '{agent_name}' to create {', '.join(output_labels)}."
self.log_add(workflow, step_info)
# Check if prompt is empty
@@ -542,77 +546,72 @@ JSON_OUTPUT = {{
logger.warning("Empty prompt, no task to do")
return []
- # Prepare input documents for the agent
- input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
-
- # Prepare output documents for the agent
- output_documents = []
- for doc in task.get("output_documents",[]):
- output_document={
- "label":doc.get("label"),
- "descripton_file_content":doc.get("prompt")
- }
- output_documents.append(output_document)
-
- # Create AI prompt
- ai_prompt = f"""
-# Please deliver documents according to this instruction:
-
-
-{agent_prompt}
-
-
-
-# Input documents:
-
-{self.parse_json2text(input_documents)}
-
-
-# Output documents to provide:
-
-{self.parse_json2text(output_documents)}
-
-
-Your output must be strictly in the following JSON_OUTPUT format, with no additions before or after the JSON object:
-
-JSON_OUTPUT = {{
- "feedback":"your feedback for the delivered result",
- "documents": [
- {{
- "label":"label of output document",
- "content": "the produced content; if text format, then as text, otherwise in base64 format"
- }},
- # each output document a separate item
- ]
-}}
-"""
-
# Get agent from registry
agent = self.agent_registry.get_agent(agent_name)
if not agent:
logger.error(f"Agent '{agent_name}' not found")
return []
-
- # Execute the agent
- try:
- agent_results = await agent.process_message(ai_prompt)
- except Exception as e:
- logger.error(f"Error executing agent '{agent_name}': {str(e)}")
- return []
-
- # Store produced files and prepare input object for message
- agent_inputs = {
- "prompt": agent_results.get("feedback",""),
- "list_file_id": self.agent_save_documents(agent_results)
+
+ # Prepare input documents for the agent
+ input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
+
+ # Prepare output document specifications
+ output_specs = []
+ for doc in task.get("output_documents", []):
+ output_spec = {
+ "label": doc.get("label"),
+ "description": doc.get("prompt", "")
+ }
+ output_specs.append(output_spec)
+
+ # Create a standardized task object for the agent
+ agent_task = {
+ "task_id": str(uuid.uuid4()),
+ "workflow_id": workflow.get("id"),
+ "prompt": agent_prompt,
+ "input_documents": input_documents,
+ "output_specifications": output_specs,
+ "context": {
+ "workflow_round": workflow.get("current_round", 1),
+ "agent_type": agent_name,
+ "timestamp": datetime.now().isoformat()
+ }
}
-
- agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
- logger.debug(f"agent result = {self.parse_json2text(agent_message)}.")
- return agent_message.get("documents")
+
+ # Execute the agent with the standardized task
+ try:
+ # Process the task using the agent's standardized interface
+ agent_results = await agent.process_task(agent_task)
+
+ # Log the agent response
+ self.log_add(
+ workflow,
+ f"Agent '{agent_name}' completed task. Feedback: {agent_results.get('feedback', 'No feedback provided')}"
+ )
+
+ # Store produced files and prepare input object for message
+ agent_inputs = {
+ "prompt": agent_results.get("feedback", ""),
+ "list_file_id": self.agent_save_documents(agent_results)
+ }
+
+ # Create a message in the workflow with the agent's response
+ agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
+ logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
+
+ return agent_message.get("documents", [])
+
+ except Exception as e:
+ error_msg = f"Error executing agent '{agent_name}': {str(e)}"
+ logger.error(error_msg)
+ self.log_add(workflow, error_msg, level="error")
+ return []
+
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
"""
Saves all documents from agent results as files and returns a list of file IDs.
+ Enhanced to handle the standardized document format from agents.
Args:
agent_results: Dictionary containing agent feedback and documents
@@ -632,32 +631,45 @@ JSON_OUTPUT = {{
content = doc.get("content", "")
# Split label into name and extension
- import os
name, ext = os.path.splitext(label)
if ext.startswith('.'):
ext = ext[1:] # Remove leading dot
+ elif not ext:
+ # If no extension is provided, default to .txt for text content
+ ext = "txt"
+ label = f"{label}.{ext}"
# Determine if content is base64 encoded
is_base64 = False
- import base64
if not isinstance(content, bytes):
- # Check if content looks like base64
+ # Check if content might be base64 encoded
try:
- # Try to decode a small sample
- if content and isinstance(content, str) and len(content) > 0:
- sample = content[:100] if len(content) > 100 else content
- base64.b64decode(sample)
- # If no error, assume it's base64
- is_base64 = True
+ if content and isinstance(content, str):
+ # Check for base64 pattern (simplified)
+ if (len(content) % 4 == 0 and
+ re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
+ # Try to decode a small sample
+ sample = content[:100] if len(content) > 100 else content
+ base64.b64decode(sample)
+ is_base64 = True
except Exception:
# Not base64, treat as regular text
is_base64 = False
+
+ # If content has metadata flag indicating it's base64
+ if isinstance(content, dict) and content.get("_is_base64", False):
+ is_base64 = True
+ content = content.get("data", "")
# Convert content to bytes
if isinstance(content, str):
if is_base64:
# Decode base64 to bytes
- file_content = base64.b64decode(content)
+ try:
+ file_content = base64.b64decode(content)
+ except Exception as e:
+ logger.warning(f"Failed to decode base64 content: {str(e)}")
+ file_content = content.encode('utf-8')
else:
# Convert text to bytes
file_content = content.encode('utf-8')
@@ -682,6 +694,7 @@ JSON_OUTPUT = {{
return file_ids
+
### Messages
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
@@ -740,7 +753,7 @@ JSON_OUTPUT = {{
Zusammenfassung der Nachricht
"""
role = message.get("role", "undefined")
- agent_type = message.get("agent_type", "")
+ agent_name = message.get("agent_name", "")
content = message.get("content", "")
try:
@@ -762,7 +775,7 @@ JSON_OUTPUT = {{
if docs_list:
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}"
- return f"[{role} {agent_type}]: {content_summary}{docs_summary}"
+ return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
async def message_summarize_content(self, content: Dict[str, Any]) -> str:
"""
diff --git a/modules/chat_agent_analyst.py b/modules/chat_agent_analyst.py
index 2454905d..6e57ca8e 100644
--- a/modules/chat_agent_analyst.py
+++ b/modules/chat_agent_analyst.py
@@ -1,6 +1,6 @@
"""
-Datenanalyst-Agent für die Analyse und Interpretation von Daten.
-Angepasst für die neue chat.py Architektur und chat_registry.py.
+Data analyst agent for analysis and interpretation of data.
+Optimized for the new task-based processing.
"""
import logging
@@ -20,333 +20,333 @@ from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentAnalyst(AgentBase):
- """Agent für die Analyse und Interpretation von Daten"""
+ """Agent for analysis and interpretation of data"""
def __init__(self):
- """Initialisiert den Datenanalyse-Agent"""
+ """Initialize the data analysis agent"""
super().__init__()
- self.name = "Data Analyst"
- self.capabilities = "data_analysis,pattern_recognition,statistics,visualization,data_interpretation"
+ self.name = "analyst"
+ self.description = "Analyzes and interprets data using statistical methods and visualizations"
+ self.capabilities = [
+ "data_analysis",
+ "pattern_recognition",
+ "statistics",
+ "visualization",
+ "data_interpretation"
+ ]
- # Visualisierungseinstellungen
+ # Visualization settings
self.plt_style = 'seaborn-v0_8-whitegrid'
self.default_figsize = (10, 6)
self.chart_dpi = 100
plt.style.use(self.plt_style)
- def get_agent_info(self) -> Dict[str, Any]:
- """Gibt Agent-Informationen für die Registry zurück"""
- info = super().get_config()
- return info
-
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
- Verarbeitet eine Nachricht und führt Datenanalyse durch.
+ Process a standardized task structure and perform data analysis.
Args:
- message: Eingabenachricht
- context: Optionaler Kontext
-
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - prompt: The main instruction for the agent
+ - input_documents: List of documents to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
Returns:
- Antwortnachricht mit Analyseergebnissen
+ A dictionary containing:
+ - feedback: Text response explaining the analysis results
+ - documents: List of created document objects
"""
- # Workflow-ID aus Kontext oder Nachricht extrahieren
- workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
-
- # Antwortstruktur erstellen
- response = {
- "role": "assistant",
- "content": "",
- "agent_name": self.name,
- "workflow_id": workflow_id,
- "documents": []
- }
-
try:
- # Aufgabe aus Nachricht extrahieren
- task = message.get("content", "")
+ # Extract relevant task information
+ prompt = task.get("prompt", "")
+ input_documents = task.get("input_documents", [])
+ output_specs = task.get("output_specifications", [])
- # Angehängte Dokumente verarbeiten und Daten extrahieren
- document_context = ""
- data_frames = {}
+ # Check if AI service is available
+ if not self.ai_service:
+ logger.error("No AI service configured for the Analyst agent")
+ return {
+ "feedback": "The Analyst agent is not properly configured.",
+ "documents": []
+ }
- if message.get("documents"):
- logger.info("Verarbeite Dokumente für die Analyse")
- document_context, data_frames = await self._process_and_extract_data(message)
+ # Extract data from input documents
+ data_frames, document_context = self._extract_data_from_documents(input_documents)
- # Prüfen, ob wir analysierbare Inhalte haben
- have_analyzable_content = len(data_frames) > 0 or (task and len(task.strip()) > 10)
+ # Check if we have analyzable content
+ have_analyzable_content = len(data_frames) > 0 or (prompt and len(prompt.strip()) > 10)
if not have_analyzable_content:
- # Warnmeldung, wenn keine analysierbaren Inhalte vorhanden sind
- if message.get("documents"):
- analysis_content = "## Datenanalyse-Bericht\n\nIch konnte keine verarbeitbaren Daten in den bereitgestellten Dokumenten finden. Bitte stellen Sie sicher, dass Sie CSV-, Excel- oder andere Datendateien in einem Format beifügen, das ich analysieren kann."
- else:
- analysis_content = "## Datenanalyse-Bericht\n\nEs wurden keine Daten oder ausreichenden Textinhalte für die Analyse bereitgestellt. Bitte stellen Sie Text für die Analyse bereit oder fügen Sie Datendateien bei, die ich analysieren kann."
-
- response["content"] = analysis_content
- return response
+ # Warning if no analyzable content available
+ logger.warning("No analyzable content found")
+ feedback = "I couldn't find any processable data in the provided documents."
+ return {
+ "feedback": feedback,
+ "documents": []
+ }
- # Analysetyp bestimmen und Analyse durchführen
- analysis_type = self._determine_analysis_type(task)
- logger.info(f"Führe {analysis_type}-Analyse durch")
+ # Determine analysis type
+ analysis_type = self._determine_analysis_type(prompt)
+ logger.info(f"Performing {analysis_type} analysis")
- # Prompt mit Dokumentkontext erweitern
- enhanced_prompt = self._create_enhanced_prompt(message, document_context, context)
+ # Store generated documents
+ generated_documents = []
- # Visualisierungsdokumente generieren, falls Daten vorhanden sind
- visualization_documents = []
+ # Extract data insights if DataFrames are available
+ data_insights = ""
if data_frames:
- logger.info(f"Generiere Visualisierungen für {len(data_frames)} Datensätze")
- visualization_documents = self._generate_visualizations(data_frames, analysis_type, workflow_id, task)
-
- # Visualisierungen zur Antwort hinzufügen
- response["documents"].extend(visualization_documents)
-
- # Analyse mit Datenerkenntnissen generieren, falls Datenrahmen vorhanden sind
- analysis_content = ""
- if data_frames:
- # Datenerkenntnisse extrahieren
data_insights = self._extract_data_insights(data_frames)
+ logger.info(f"Extracted insights from {len(data_frames)} datasets")
+
+ # Generate an appropriate document for each requested output
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ output_description = spec.get("description", "")
- # Erkenntnisse zum Prompt hinzufügen
- enhanced_prompt += f"\n\n=== DATENERKENNTNISSE ===\n{data_insights}"
+ # Determine format based on file extension
+ format_type = self._determine_format_type(output_label)
- # Analyse mit Datenerkenntnissen generieren
- analysis_content = await self._generate_analysis(enhanced_prompt, analysis_type)
-
- # Verweise auf die Visualisierungsdokumente einfügen
- if visualization_documents:
- viz_references = "\n\n## Visualisierungen\n\n"
- viz_references += "Die folgenden Visualisierungen wurden erstellt, um die Daten besser zu verstehen:\n\n"
+ # Special handling for visualizations if required
+ if "chart" in output_label.lower() or "plot" in output_label.lower() or "visualization" in output_label.lower() or format_type in ["png", "jpg", "svg"]:
+ # Generate visualization document if data available
+ if data_frames:
+ viz_document = self._generate_visualization_document(data_frames, analysis_type, prompt, output_label)
+ generated_documents.append(viz_document)
+ else:
+ # Fallback if no data
+ generated_documents.append({
+ "label": output_label,
+ "content": "No data available for visualization."
+ })
+ else:
+ # Create text-based analysis
+ content = await self._generate_analysis_document(
+ prompt,
+ document_context,
+ data_insights,
+ analysis_type,
+ format_type,
+ output_label,
+ output_description
+ )
- for i, doc in enumerate(visualization_documents, 1):
- doc_source = doc.get("source", {})
- doc_name = doc_source.get("name", f"Visualisierung {i}")
- viz_references += f"{i}. **{doc_name}** - Als angehängtes Dokument verfügbar\n"
-
- analysis_content += viz_references
+ generated_documents.append({
+ "label": output_label,
+ "content": content
+ })
+
+ # If no specific outputs requested, create standard documents
+ if not output_specs:
+ # Standard analysis
+ analysis_content = await self._generate_analysis_document(
+ prompt,
+ document_context,
+ data_insights,
+ analysis_type,
+ "markdown",
+ "analysis_report.md",
+ "Analysis report"
+ )
+
+ generated_documents.append({
+ "label": "analysis_report.md",
+ "content": analysis_content
+ })
+
+ # Add visualization if data available
+ if data_frames:
+ viz_document = self._generate_visualization_document(data_frames, analysis_type, prompt, "data_visualization.png")
+ generated_documents.append(viz_document)
+
+ # Create feedback
+ if data_frames:
+ feedback = f"I analyzed {len(data_frames)} datasets and created {len(generated_documents)} documents with the results."
else:
- # Analyse basierend nur auf Text, wenn keine Datenrahmen vorhanden sind
- logger.info("Keine Datenrahmen verfügbar, analysiere Textinhalt")
- analysis_content = await self._generate_analysis(enhanced_prompt, analysis_type)
+ feedback = f"I performed a text analysis and created {len(generated_documents)} documents with the results."
- # Inhalt in der Antwort setzen
- response["content"] = analysis_content
-
- return response
+ return {
+ "feedback": feedback,
+ "documents": generated_documents
+ }
except Exception as e:
- error_msg = f"Fehler bei der Datenanalyse: {str(e)}"
+ error_msg = f"Error during data analysis: {str(e)}"
logger.error(error_msg)
- response["content"] = f"## Fehler bei der Datenanalyse\n\n{error_msg}"
- return response
+ return {
+ "feedback": f"An error occurred during data analysis: {str(e)}",
+ "documents": []
+ }
- def _create_enhanced_prompt(self, message: Dict[str, Any], document_context: str, context: Dict[str, Any] = None) -> str:
+ def _extract_data_from_documents(self, documents: List[Dict[str, Any]]) -> tuple:
"""
- Erstellt einen erweiterten Prompt für die Analyse, der alle verfügbaren Inhalte integriert.
+ Extract data from input documents.
Args:
- message: Die ursprüngliche Nachricht
- document_context: Aus Dokumenten extrahierter Kontext
- context: Optionaler zusätzlicher Kontext
+ documents: List of input documents
Returns:
- Erweiterter Prompt für die Analyse
+ Tuple of (Dictionary of DataFrames, Document context text)
"""
- # Originale Aufgabe/Prompt abrufen
- task = message.get("content", "")
-
- # Mit Aufgabe beginnen
- enhanced_prompt = f"ANALYSEAUFGABE:\n{task}"
-
- # Dokumentkontext hinzufügen, falls vorhanden
- if document_context:
- enhanced_prompt += f"\n\n=== DOKUMENTINHALT ===\n{document_context}"
- else:
- # Wenn kein Dokumentinhalt vorhanden ist, ausdrücklich darauf hinweisen, dass wir den Textinhalt direkt analysieren
- enhanced_prompt += "\n\nEs wurden keine Datendateien bereitgestellt. Führe eine Analyse des Textinhalts selbst durch."
-
- return enhanced_prompt
-
- async def _process_and_extract_data(self, message: Dict[str, Any]) -> tuple:
- """
- Verarbeitet Dokumente und extrahiert strukturierte Daten.
-
- Args:
- message: Eingabenachricht mit Dokumenten
-
- Returns:
- Tuple aus (document_context, data_frames_dict)
- """
- document_context = ""
data_frames = {}
+ document_context = ""
- if not message.get("documents"):
- return document_context, data_frames
-
- # Dokumenttext extrahieren
- document_context = self._extract_document_text(message)
-
- # Datendateien identifizieren und verarbeiten (CSV, Excel usw.)
- for document in message.get("documents", []):
- source = document.get("source", {})
- filename = source.get("name", "")
+ for doc in documents:
+ doc_name = doc.get("name", "unnamed")
+ document_context += f"\n\n--- {doc_name} ---\n"
- # Überspringen, wenn keine erkennbare Datendatei
- if not self._is_data_file(filename):
- continue
-
- try:
- # Dateiinhalt aus Dokumentinhalten extrahieren
- file_content = None
- for content in document.get("contents", []):
- if content.get("type") == "text":
- file_content = content.get("text", "")
- break
-
- # Nach Dateityp verarbeiten
- if filename.lower().endswith('.csv') and file_content:
- df = pd.read_csv(io.StringIO(file_content))
- df = self._preprocess_dataframe(df)
- data_frames[filename] = df
-
- elif filename.lower().endswith(('.xlsx', '.xls')) and file_content:
- # XLS-Dateien können nicht direkt aus Text verarbeitet werden
- logger.warning(f"Excel-Datei {filename} kann nicht direkt aus Textinhalt verarbeitet werden")
-
- elif filename.lower().endswith('.json') and file_content:
- try:
- data = json.loads(file_content)
- if isinstance(data, list):
- df = pd.DataFrame(data)
- elif isinstance(data, dict):
- if any(isinstance(v, list) for v in data.values()):
- for key, value in data.items():
- if isinstance(value, list) and len(value) > 0:
- df = pd.DataFrame(value)
- break
+ for content in doc.get("contents", []):
+ # Extract text content and add to context
+ if content.get("metadata", {}).get("is_text", False):
+ document_context += content.get("data", "")
+
+ # Try to parse CSV, JSON, or other data files from text
+ if doc_name.lower().endswith('.csv'):
+ try:
+ df = pd.read_csv(io.StringIO(content.get("data", "")))
+ df = self._preprocess_dataframe(df)
+ data_frames[doc_name] = df
+ logger.info(f"Extracted CSV data from {doc_name}: {df.shape}")
+ except Exception as e:
+ logger.warning(f"Error parsing CSV {doc_name}: {str(e)}")
+
+ elif doc_name.lower().endswith('.json'):
+ try:
+ json_data = json.loads(content.get("data", ""))
+ if isinstance(json_data, list):
+ df = pd.DataFrame(json_data)
+ elif isinstance(json_data, dict):
+ # Convert nested JSON to DataFrame
+ if any(isinstance(v, list) for v in json_data.values()):
+ # If lists present, try to use them
+ for key, value in json_data.items():
+ if isinstance(value, list) and len(value) > 0:
+ df = pd.DataFrame(value)
+ break
+ else:
+ continue
else:
- continue
+ df = pd.DataFrame([json_data])
else:
- df = pd.DataFrame([data])
- else:
- continue
-
- df = self._preprocess_dataframe(df)
- data_frames[filename] = df
- except:
- logger.error(f"Fehler beim Verarbeiten der JSON-Datei {filename}")
-
- except Exception as e:
- logger.error(f"Fehler beim Verarbeiten der Datei {filename}: {str(e)}")
+ continue
+
+ df = self._preprocess_dataframe(df)
+ data_frames[doc_name] = df
+ logger.info(f"Extracted JSON data from {doc_name}: {df.shape}")
+ except Exception as e:
+ logger.warning(f"Error parsing JSON {doc_name}: {str(e)}")
- return document_context, data_frames
+ return data_frames, document_context
- def _is_data_file(self, filename: str) -> bool:
- """Prüft, ob eine Datei eine verarbeitbare Datendatei ist"""
- if filename.lower().endswith(('.csv', '.xlsx', '.xls', '.json')):
- return True
- return False
+ def _determine_format_type(self, output_label: str) -> str:
+ """
+ Determine the format type based on the filename.
+
+ Args:
+ output_label: Output filename
+
+ Returns:
+ Format type (markdown, html, text, png, etc.)
+ """
+ output_label_lower = output_label.lower()
+
+ if output_label_lower.endswith('.md'):
+ return "markdown"
+ elif output_label_lower.endswith('.html'):
+ return "html"
+ elif output_label_lower.endswith('.txt'):
+ return "text"
+ elif output_label_lower.endswith('.json'):
+ return "json"
+ elif output_label_lower.endswith('.csv'):
+ return "csv"
+ elif output_label_lower.endswith('.png'):
+ return "png"
+ elif output_label_lower.endswith('.jpg') or output_label_lower.endswith('.jpeg'):
+ return "jpg"
+ elif output_label_lower.endswith('.svg'):
+ return "svg"
+ else:
+ # Default to markdown
+ return "markdown"
def _preprocess_dataframe(self, df: pd.DataFrame) -> pd.DataFrame:
- """Führt grundlegende Vorverarbeitung für einen DataFrame durch"""
+ """Perform basic preprocessing for a DataFrame"""
if df.empty:
return df
- # Vollständig leere Zeilen und Spalten entfernen
+ # Remove completely empty rows and columns
df = df.dropna(how='all')
df = df.dropna(axis=1, how='all')
- # Stringkonvertierung zu numerischen Werten, wo angemessen
+ # String conversion to numeric values where appropriate
for col in df.columns:
- # Überspringen, wenn bereits numerisch
+ # Skip if already numeric
if pd.api.types.is_numeric_dtype(df[col]):
continue
- # Überspringen, wenn überwiegend nicht-numerische Strings
+ # Skip if predominantly non-numeric strings
if df[col].dtype == 'object':
- # Prüfen, ob mehr als 80% der Nicht-NA-Werte numerisch sein könnten
+ # Check if more than 80% of non-NA values could be numeric
non_na_values = df[col].dropna()
if len(non_na_values) == 0:
continue
- # Versuch der Konvertierung zu numerischen Werten
+ # Attempt conversion to numeric values
numeric_count = pd.to_numeric(non_na_values, errors='coerce').notna().sum()
if numeric_count / len(non_na_values) > 0.8:
- # Mehr als 80% können in numerische Werte konvertiert werden
+ # More than 80% can be converted to numeric values
df[col] = pd.to_numeric(df[col], errors='coerce')
return df
- def _extract_document_text(self, message: Dict[str, Any]) -> str:
- """
- Extrahiert Text aus Dokumenten.
-
- Args:
- message: Eingabenachricht mit Dokumenten
-
- Returns:
- Extrahierter Text
- """
- text_content = ""
- for document in message.get("documents", []):
- source = document.get("source", {})
- name = source.get("name", "unnamed")
-
- text_content += f"\n\n--- {name} ---\n"
-
- for content in document.get("contents", []):
- if content.get("type") == "text":
- text_content += content.get("text", "")
-
- return text_content
-
def _determine_analysis_type(self, task: str) -> str:
"""
- Bestimmt den Analysetyp basierend auf der Aufgabe.
+ Determine the analysis type based on the task.
Args:
- task: Die Analyseaufgabe
+ task: The analysis task
Returns:
- Analysetyp
+ Analysis type
"""
+ # Using universal patterns rather than language-specific keywords
task_lower = task.lower()
- # Prüfen auf statistische Analyse
- if any(term in task_lower for term in ["statistik", "statistical", "mittelwert", "mean", "median", "varianz"]):
+ # Check for statistical analysis
+ if "statistical" in task_lower or "stats" in task_lower:
return "statistical"
- # Prüfen auf Trend-Analyse
- elif any(term in task_lower for term in ["trend", "pattern", "zeitreihe", "time series", "historisch"]):
+ # Check for trend analysis
+ elif "trend" in task_lower or "time series" in task_lower:
return "trend"
- # Prüfen auf vergleichende Analyse
- elif any(term in task_lower for term in ["vergleich", "compare", "comparison", "versus", "vs", "unterschied"]):
+ # Check for comparative analysis
+ elif "compare" in task_lower or "comparison" in task_lower or "vs" in task_lower:
return "comparative"
- # Prüfen auf prädiktive Analyse
- elif any(term in task_lower for term in ["vorhersage", "predict", "forecast", "zukunft", "future"]):
+ # Check for predictive analysis
+ elif "predict" in task_lower or "forecast" in task_lower:
return "predictive"
- # Prüfen auf Clustering oder Kategorisierung
- elif any(term in task_lower for term in ["cluster", "segment", "kategorisieren", "classify"]):
+ # Check for clustering or categorization
+ elif "cluster" in task_lower or "segment" in task_lower or "classify" in task_lower:
return "clustering"
- # Standard: allgemeine Analyse
+ # Default: general analysis
else:
return "general"
def _extract_data_insights(self, data_frames: Dict[str, pd.DataFrame]) -> str:
"""
- Extrahiert grundlegende Erkenntnisse aus DataFrames.
+ Extract basic insights from DataFrames.
Args:
- data_frames: Dictionary von DataFrames
+ data_frames: Dictionary of DataFrames
Returns:
- Extrahierte Erkenntnisse als Text
+ Extracted insights as text
"""
insights = []
@@ -354,424 +354,395 @@ class AgentAnalyst(AgentBase):
if df.empty:
continue
- insight = f"Datensatz: {name}\n"
- insight += f"Form: {df.shape[0]} Zeilen, {df.shape[1]} Spalten\n"
- insight += f"Spalten: {', '.join(df.columns.tolist())}\n"
+ insight = f"Dataset: {name}\n"
+ insight += f"Shape: {df.shape[0]} rows, {df.shape[1]} columns\n"
+ insight += f"Columns: {', '.join(df.columns.tolist())}\n"
- # Grundlegende Statistiken für numerische Spalten
+ # Basic statistics for numeric columns
numeric_cols = df.select_dtypes(include=['number']).columns
if len(numeric_cols) > 0:
- insight += "Statistiken für numerische Spalten:\n"
- for col in numeric_cols[:5]: # Auf die ersten 5 Spalten begrenzen
+ insight += "Statistics for numeric columns:\n"
+ for col in numeric_cols[:5]: # Limit to first 5 columns
stats = df[col].describe()
insight += f" {col}: min={stats['min']:.2f}, max={stats['max']:.2f}, mean={stats['mean']:.2f}, median={df[col].median():.2f}\n"
- # Kategoriale Spaltenwerte
+ # Categorical column values
cat_cols = df.select_dtypes(include=['object', 'category']).columns
if len(cat_cols) > 0:
- insight += "Kategoriale Spalten:\n"
- for col in cat_cols[:3]: # Auf die ersten 3 Spalten begrenzen
- # Top 3 Werte abrufen
+ insight += "Categorical columns:\n"
+ for col in cat_cols[:3]: # Limit to first 3 columns
+ # Get top 3 values
top_values = df[col].value_counts().head(3)
vals_str = ", ".join([f"{val} ({count})" for val, count in top_values.items()])
- insight += f" {col}: {df[col].nunique()} eindeutige Werte. Häufigste Werte: {vals_str}\n"
+ insight += f" {col}: {df[col].nunique()} unique values. Most common values: {vals_str}\n"
insights.append(insight)
return "\n\n".join(insights)
- def _generate_visualizations(self, data_frames: Dict[str, pd.DataFrame], analysis_type: str,
- workflow_id: str, task: str) -> List[Dict[str, Any]]:
+ def _generate_visualization_document(self, data_frames: Dict[str, pd.DataFrame],
+ analysis_type: str, prompt: str,
+ output_label: str) -> Dict[str, Any]:
"""
- Generiert passende Visualisierungen basierend auf Daten und Analysetyp.
+ Generate a visualization document based on the data and analysis type.
Args:
- data_frames: Dictionary von zu visualisierenden DataFrames
- analysis_type: Durchzuführender Analysetyp
- workflow_id: Workflow-ID
- task: Ursprüngliche Aufgabenbeschreibung
+ data_frames: Dictionary of DataFrames
+ analysis_type: Analysis type
+ prompt: Original task description
+ output_label: Output filename
Returns:
- Liste von Visualisierungsdokumentobjekten
+ Visualization document
"""
- documents = []
+ # Determine format from filename
+ format_type = output_label.split('.')[-1].lower() if '.' in output_label else 'png'
- for name, df in data_frames.items():
- if df.empty or df.shape[0] < 2:
- continue # Leere oder einzeilige DataFrames überspringen
-
- # Verschiedene Visualisierungen basierend auf dem Analysetyp erzeugen
- if analysis_type == "statistical":
- viz_docs = self._create_statistical_visualizations(df, name)
- documents.extend(viz_docs)
-
- elif analysis_type == "trend":
- viz_docs = self._create_trend_visualizations(df, name)
- documents.extend(viz_docs)
-
- elif analysis_type == "comparative":
- viz_docs = self._create_comparative_visualizations(df, name)
- documents.extend(viz_docs)
-
- else: # Allgemeine Analyse
- viz_docs = self._create_general_visualizations(df, name)
- documents.extend(viz_docs)
+ # Set default format if unknown
+ if format_type not in ['png', 'jpg', 'jpeg', 'svg']:
+ format_type = 'png'
- return documents
-
- def _create_statistical_visualizations(self, df: pd.DataFrame, name: str) -> List[Dict[str, Any]]:
- """Erstellt statistische Visualisierungen für einen DataFrame"""
- documents = []
-
- # 1. Verteilungs-/Histogramm-Plots für numerische Spalten
- numeric_cols = df.select_dtypes(include=['number']).columns[:3] # Auf erste 3 begrenzen
- if len(numeric_cols) > 0:
- plt.figure(figsize=(12, 4 * len(numeric_cols)))
-
- for i, col in enumerate(numeric_cols, 1):
- plt.subplot(len(numeric_cols), 1, i)
- sns.histplot(df[col].dropna(), kde=True)
- plt.title(f'Verteilung von {col}')
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_stat_dist_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Statistische Verteilungen - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
+ # Use first DataFrame for visualization
+ if not data_frames:
+ return {
+ "label": output_label,
+ "content": "No data available for visualization."
}
- documents.append(doc)
- # 2. Box-Plots für numerische Spalten
- if len(numeric_cols) > 0:
- plt.figure(figsize=(12, 8))
- sns.boxplot(data=df[numeric_cols])
- plt.title(f'Box-Plots der numerischen Variablen in {name}')
- plt.xticks(rotation=45)
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_stat_box_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Box-Plots - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
+ # Get name and DataFrame of first dataset
+ name, df = next(iter(data_frames.items()))
- return documents
+ # Create different visualization types based on analysis type and data
+ plt.figure(figsize=self.default_figsize)
+
+ if analysis_type == "statistical":
+ # Statistical visualization
+ self._create_statistical_visualization(df, name)
+ elif analysis_type == "trend":
+ # Trend visualization
+ self._create_trend_visualization(df, name)
+ elif analysis_type == "comparative":
+ # Comparative visualization
+ self._create_comparative_visualization(df, name)
+ elif analysis_type == "predictive":
+ # Predictive visualization (simple example)
+ self._create_predictive_visualization(df, name)
+ elif analysis_type == "clustering":
+ # Clustering visualization
+ self._create_clustering_visualization(df, name)
+ else:
+ # General visualization
+ self._create_general_visualization(df, name)
+
+ # Save figure as Base64 string
+ img_data = self._get_figure_as_base64(format_type)
+ plt.close()
+
+ # Prepare content for document based on format
+ if format_type in ['png', 'jpg', 'jpeg']:
+ content_str = img_data
+ elif format_type == 'svg':
+ # SVG content as text
+ buffer = io.StringIO()
+ plt.savefig(buffer, format='svg')
+ content_str = buffer.getvalue()
+ buffer.close()
+ else:
+ # Fallback to PNG
+ content_str = img_data
+
+ return {
+ "label": output_label,
+ "content": content_str
+ }
- def _create_trend_visualizations(self, df: pd.DataFrame, name: str) -> List[Dict[str, Any]]:
- """Erstellt Trend-Visualisierungen für einen DataFrame"""
- documents = []
+ def _create_statistical_visualization(self, df: pd.DataFrame, name: str):
+ """Create a statistical visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns[:4] # Limit to first 4
- # Numerische Spalten für die Darstellung verwenden
- numeric_cols = df.select_dtypes(include=['number']).columns[:2] # Auf erste 2 begrenzen
+ if len(numeric_cols) == 0:
+ plt.text(0.5, 0.5, "No numeric data found for statistical visualization",
+ ha='center', va='center', fontsize=12)
+ return
- if len(numeric_cols) > 0:
- plt.figure(figsize=(12, 6))
-
+ # Visualize distribution of first numeric column
+ main_col = numeric_cols[0]
+
+ # Create histogram with KDE
+ sns.histplot(df[main_col].dropna(), kde=True)
+ plt.title(f'Distribution of {main_col} - {name}')
+ plt.xlabel(main_col)
+ plt.ylabel('Frequency')
+ plt.tight_layout()
+
+ def _create_trend_visualization(self, df: pd.DataFrame, name: str):
+ """Create a trend visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns[:3] # Limit to first 3
+
+ if len(numeric_cols) == 0:
+ plt.text(0.5, 0.5, "No numeric data found for trend visualization",
+ ha='center', va='center', fontsize=12)
+ return
+
+ # Look for date index or use running index
+ date_col = None
+ for col in df.columns:
+ if pd.api.types.is_datetime64_dtype(df[col]) or 'date' in col.lower() or 'time' in col.lower():
+ date_col = col
+ break
+
+ # Use date column as X-axis if available
+ if date_col:
for col in numeric_cols:
- plt.plot(df.index, df[col], marker='o', label=col)
-
- plt.title(f'Trendsicht von {", ".join(numeric_cols)} - {name}')
- plt.legend()
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_trend_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Trendanalyse - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
+ plt.plot(df[date_col], df[col], marker='o', linestyle='-', label=col)
+ else:
+ # Otherwise use index numbers
+ for col in numeric_cols:
+ plt.plot(range(len(df)), df[col], marker='o', linestyle='-', label=col)
- return documents
+ plt.title(f'Trend Analysis - {name}')
+ plt.legend()
+ plt.grid(True)
+ plt.tight_layout()
- def _create_comparative_visualizations(self, df: pd.DataFrame, name: str) -> List[Dict[str, Any]]:
- """Erstellt vergleichende Visualisierungen für einen DataFrame"""
- documents = []
+ def _create_comparative_visualization(self, df: pd.DataFrame, name: str):
+ """Create a comparative visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns[:4] # Limit to first 4
- # 1. Kategoriale Spalten für Gruppierung suchen
- cat_cols = df.select_dtypes(include=['object', 'category']).columns
+ if len(numeric_cols) == 0:
+ plt.text(0.5, 0.5, "No numeric data found for comparative visualization",
+ ha='center', va='center', fontsize=12)
+ return
- if len(cat_cols) > 0:
- # Erste kategoriale Spalte mit angemessener Anzahl eindeutiger Werte verwenden
- groupby_col = None
- for col in cat_cols:
- unique_count = df[col].nunique()
- if 2 <= unique_count <= 10: # Angemessene Anzahl von Kategorien
- groupby_col = col
- break
+ # Find categorical column for grouping
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+ if len(categorical_cols) > 0:
+ category_col = categorical_cols[0]
- if groupby_col:
- # Numerische Spalten für den Vergleich über Gruppen hinweg suchen
- numeric_cols = df.select_dtypes(include=['number']).columns[:3] # Auf erste 3 begrenzen
-
- if len(numeric_cols) > 0:
- # 1. Balkendiagramm, das Mittelwerte vergleicht
- plt.figure(figsize=(12, 6))
- mean_by_group = df.groupby(groupby_col)[numeric_cols].mean()
- mean_by_group.plot(kind='bar')
- plt.title(f'Vergleich der Mittelwerte nach {groupby_col} - {name}')
- plt.xticks(rotation=45)
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_comp_bar_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Mittelwertvergleich nach {groupby_col} - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
-
- # 2. Streudiagramm für den Vergleich zweier numerischer Variablen
- numeric_cols = df.select_dtypes(include=['number']).columns
- if len(numeric_cols) >= 2:
- plt.figure(figsize=(10, 8))
- # Erste beiden numerischen Spalten als Feature und Ziel verwenden
- x_col, y_col = numeric_cols[0], numeric_cols[1]
+ # Display maximum of first 7 categories
+ top_categories = df[category_col].value_counts().head(7).index
+ filtered_df = df[df[category_col].isin(top_categories)]
- plt.scatter(df[x_col], df[y_col])
- plt.title(f'Vergleich von {x_col} vs {y_col} - {name}')
- plt.xlabel(x_col)
- plt.ylabel(y_col)
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_comp_scatter_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Variablenvergleich - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
-
- return documents
-
- def _create_general_visualizations(self, df: pd.DataFrame, name: str) -> List[Dict[str, Any]]:
- """Erstellt allgemeine Visualisierungen für einen DataFrame"""
- documents = []
-
- # 1. Datenübersicht: Numerische Zusammenfassung
- numeric_cols = df.select_dtypes(include=['number']).columns
- if len(numeric_cols) > 0:
- # Balkendiagramm der Mittelwerte für numerische Spalten erstellen
- plt.figure(figsize=(12, 6))
- means = df[numeric_cols].mean().sort_values()
- means.plot(kind='bar')
- plt.title(f'Mittelwerte der numerischen Variablen - {name}')
+ # Create grouped bar chart
+ numeric_col = numeric_cols[0]
+ sns.barplot(x=category_col, y=numeric_col, data=filtered_df)
+ plt.title(f'Comparison of {numeric_col} by {category_col} - {name}')
plt.xticks(rotation=45)
plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_gen_means_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Zusammenfassung numerischer Variablen - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
-
- # 2. Übersicht über kategoriale Daten
- cat_cols = df.select_dtypes(include=['object', 'category']).columns
- if len(cat_cols) > 0:
- # Erste kategoriale Spalte mit angemessener Kardinalität auswählen
- for col in cat_cols:
- if df[col].nunique() <= 10: # Angemessene Anzahl von Kategorien
- plt.figure(figsize=(10, 6))
- value_counts = df[col].value_counts().sort_values(ascending=False)
- value_counts.plot(kind='bar')
- plt.title(f'Verteilung von {col} - {name}')
- plt.xticks(rotation=45)
- plt.tight_layout()
-
- # Abbildung speichern
- img_data = self._get_figure_as_base64()
- plt.close()
-
- # Dokument erstellen
- doc_id = f"viz_gen_cat_{uuid.uuid4()}"
- doc = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": f"Kategoriale Verteilung - {name}",
- "content_type": "image/png"
- },
- "contents": [{
- "type": "image",
- "data": img_data,
- "format": "base64"
- }]
- }
- documents.append(doc)
- break # Nur die erste geeignete Spalte verwenden
-
- return documents
+ else:
+ # Comparative visualization for numeric columns without categories
+ if len(numeric_cols) >= 2:
+ # Scatter plot for first two numeric columns
+ sns.scatterplot(x=numeric_cols[0], y=numeric_cols[1], data=df)
+ plt.title(f'Comparison of {numeric_cols[0]} vs {numeric_cols[1]} - {name}')
+ plt.tight_layout()
+ else:
+ # Simple bar chart for a single numeric column
+ plt.bar(range(min(20, len(df))), df[numeric_cols[0]].head(20))
+ plt.title(f'Top 20 Values for {numeric_cols[0]} - {name}')
+ plt.tight_layout()
- def _get_figure_as_base64(self) -> str:
- """Konvertiert aktuelle matplotlib-Abbildung in base64-String"""
+ def _create_predictive_visualization(self, df: pd.DataFrame, name: str):
+ """Create a simple predictive visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns[:2] # Limit to first 2
+
+ if len(numeric_cols) < 2:
+ plt.text(0.5, 0.5, "At least 2 numeric columns required for predictive visualization",
+ ha='center', va='center', fontsize=12)
+ return
+
+ # Simple scatter plot with trend line
+ x = df[numeric_cols[0]].values
+ y = df[numeric_cols[1]].values
+
+ # Linear regression with NumPy
+ valid_indices = ~(np.isnan(x) | np.isnan(y))
+ if np.sum(valid_indices) > 1: # At least 2 valid data points
+ x_valid = x[valid_indices].reshape(-1, 1)
+ y_valid = y[valid_indices]
+
+ # Linear regression with NumPy polyfit
+ if len(x_valid) > 1:
+ coeffs = np.polyfit(x_valid.flatten(), y_valid, 1)
+ poly_func = np.poly1d(coeffs)
+
+ # Create prediction line
+ x_line = np.linspace(np.min(x_valid), np.max(x_valid), 100).reshape(-1, 1)
+ y_pred = poly_func(x_line)
+
+ # Create scatter plot with trend line
+ plt.scatter(x_valid, y_valid, alpha=0.7)
+ plt.plot(x_line, y_pred, 'r-', linewidth=2)
+ plt.title(f'Linear Regression: {numeric_cols[1]} vs {numeric_cols[0]} - {name}')
+ plt.xlabel(numeric_cols[0])
+ plt.ylabel(numeric_cols[1])
+ plt.tight_layout()
+ else:
+ plt.text(0.5, 0.5, "Insufficient data for predictive analysis",
+ ha='center', va='center', fontsize=12)
+
+ def _create_clustering_visualization(self, df: pd.DataFrame, name: str):
+ """Create a clustering visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns[:2] # Limit to first 2
+
+ if len(numeric_cols) < 2:
+ plt.text(0.5, 0.5, "At least 2 numeric columns required for clustering visualization",
+ ha='center', va='center', fontsize=12)
+ return
+
+ # Extract data for first two numeric columns
+ x = df[numeric_cols[0]].values
+ y = df[numeric_cols[1]].values
+
+ # Find categorical column for color coding
+ categorical_cols = df.select_dtypes(include=['object', 'category']).columns
+
+ if len(categorical_cols) > 0:
+ # Use first categorical column for color coding
+ category_col = categorical_cols[0]
+ categories = df[category_col].astype('category').cat.codes
+
+ # Create scatter plot with color coding by category
+ plt.scatter(x, y, c=categories, cmap='viridis', alpha=0.7)
+ plt.colorbar(label=category_col)
+ else:
+ # Simple scatter plot without color coding
+ plt.scatter(x, y, alpha=0.7)
+
+ plt.title(f'Clustering Visualization: {numeric_cols[1]} vs {numeric_cols[0]} - {name}')
+ plt.xlabel(numeric_cols[0])
+ plt.ylabel(numeric_cols[1])
+ plt.tight_layout()
+
+ def _create_general_visualization(self, df: pd.DataFrame, name: str):
+ """Create a general visualization for a DataFrame"""
+ # Choose numeric columns for display
+ numeric_cols = df.select_dtypes(include=['number']).columns
+
+ if len(numeric_cols) == 0:
+ plt.text(0.5, 0.5, "No numeric data found for visualization",
+ ha='center', va='center', fontsize=12)
+ return
+
+ # Create correlation matrix if multiple numeric columns available
+ if len(numeric_cols) >= 2:
+ corr_matrix = df[numeric_cols].corr()
+ sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', vmin=-1, vmax=1)
+ plt.title(f'Correlation Matrix - {name}')
+ else:
+ # Simple distribution for a single numeric column
+ sns.histplot(df[numeric_cols[0]].dropna(), kde=True)
+ plt.title(f'Distribution of {numeric_cols[0]} - {name}')
+
+ plt.tight_layout()
+
+ def _get_figure_as_base64(self, format_type: str = 'png') -> str:
+ """
+ Convert current matplotlib figure to base64 string.
+
+ Args:
+ format_type: Image format (png, jpg, svg)
+
+ Returns:
+ Base64 encoded string of the figure
+ """
buffer = io.BytesIO()
- plt.savefig(buffer, format='png', dpi=self.chart_dpi)
+ plt.savefig(buffer, format=format_type, dpi=self.chart_dpi)
buffer.seek(0)
- image_png = buffer.getvalue()
+ image_data = buffer.getvalue()
buffer.close()
- # Zu base64 konvertieren
- image_base64 = base64.b64encode(image_png).decode('utf-8')
+ # Convert to base64
+ image_base64 = base64.b64encode(image_data).decode('utf-8')
return image_base64
- async def _generate_analysis(self, prompt: str, analysis_type: str) -> str:
+ async def _generate_analysis_document(self, prompt: str, context: str, data_insights: str,
+ analysis_type: str, format_type: str,
+ output_label: str, output_description: str) -> str:
"""
- Generiert eine Analyse basierend auf Prompt und Analysetyp.
+ Generate an analysis document based on the data and prompt.
Args:
- prompt: Der Analyseprompt
- analysis_type: Analysetyp
+ prompt: Task description
+ context: Document context as text
+ data_insights: Insights from the data
+ analysis_type: Analysis type
+ format_type: Output format
+ output_label: Output filename
+ output_description: Description of desired output
Returns:
- Generierte Analyse
+ Generated document content
"""
if not self.ai_service:
- logging.warning("KI-Service nicht verfügbar für Analysegenerierung")
- return f"## Datenanalyse ({analysis_type})\n\nAnalyse konnte nicht generiert werden: KI-Service nicht verfügbar."
+ return f"# Data Analysis ({analysis_type})\n\nAnalysis could not be generated: AI service not available."
- # Spezialisierten Prompt basierend auf Analysetyp erstellen
+ # Create specialized prompt based on analysis type
system_prompt = f"""
- Du bist ein spezialisierter Datenanalyst, der auf {analysis_type}-Analysen fokussiert ist.
+ You are a specialized data analyst focused on {analysis_type} analyses.
- Erstelle eine detaillierte Analyse der bereitgestellten Daten und/oder Textinhalte.
- Deine Analyse sollte folgendes enthalten:
- 1. Eine Zusammenfassung der Daten/Inhalte
- 2. Wichtige Erkenntnisse und Einsichten
- 3. Stützende Belege und Berechnungen
- 4. Klare Schlussfolgerungen
- 5. Empfehlungen, wo angemessen
+ Create a detailed analysis of the provided data and/or text content.
+ Your analysis should include:
+ 1. A summary of the data/content
+ 2. Key findings and insights
+ 3. Supporting evidence and calculations
+ 4. Clear conclusions
+ 5. Recommendations where appropriate
- Formatiere die Analyse in Markdown mit geeigneten Überschriften, Listen und Tabellen.
+ Format the analysis in the requested output format.
"""
- # Bestimmen, ob dies eine datenbasierte oder textbasierte Analyse ist
- is_data_analysis = "DATENERKENNTNISSE" in prompt
+ # Create extended prompt with all available information
+ generation_prompt = f"""
+ Create a detailed {analysis_type} analysis for the following task:
- # Prompt mit analysespezifischen Anweisungen erweitern
- if is_data_analysis:
- enhanced_prompt = f"""
- Generiere eine detaillierte {analysis_type}-Analyse basierend auf den folgenden Daten:
-
- {prompt}
- """
- else:
- # Anweisungen für textbasierte Analyse
- enhanced_prompt = f"""
- Generiere eine detaillierte {analysis_type}-Analyse des folgenden Textinhalts:
-
- {prompt}
- """
+ TASK:
+ {prompt}
+
+ CONTEXT:
+ {context if context else 'No additional context available.'}
+
+ DATA INSIGHTS:
+ {data_insights if data_insights else 'No data insights available.'}
+
+ OUTPUT REQUIREMENTS:
+ - Filename: {output_label}
+ - Description: {output_description}
+ - Format: {format_type}
+
+ The analysis should be professional and clearly structured, considering all available information.
+
+ The output must perfectly match the {format_type} format.
+ """
try:
+ # Call AI for analysis
content = await self.ai_service.call_api([
{"role": "system", "content": system_prompt},
- {"role": "user", "content": enhanced_prompt}
+ {"role": "user", "content": generation_prompt}
])
- # Sicherstellen, dass es einen Titel am Anfang gibt
- if not content.strip().startswith("# "):
- content = f"# {analysis_type.capitalize()}-Analyse\n\n{content}"
+ # For markdown format, ensure there's a title at the beginning
+ if format_type == "markdown" and not content.strip().startswith("# "):
+ content = f"# Data Analysis ({analysis_type})\n\n{content}"
return content
except Exception as e:
- return f"# {analysis_type.capitalize()}-Analyse\n\nFehler bei der Analysegenerierung: {str(e)}"
+ logger.error(f"Error generating analysis: {str(e)}")
+ return f"# Data Analysis ({analysis_type})\n\nError generating analysis: {str(e)}"
-# Singleton-Instanz
-_analyst_agent = None
+# Factory function for the Analyst agent
def get_analyst_agent():
- """Gibt eine Singleton-Instanz des Analyst-Agenten zurück"""
- global _analyst_agent
- if _analyst_agent is None:
- _analyst_agent = AgentAnalyst()
- return _analyst_agent
\ No newline at end of file
+ """
+ Factory function that returns an instance of the Analyst agent.
+
+ Returns:
+ An instance of the Analyst agent
+ """
+ return AgentAnalyst()
\ No newline at end of file
diff --git a/modules/chat_agent_coder.py b/modules/chat_agent_coder.py
index 460f00f5..352b4433 100644
--- a/modules/chat_agent_coder.py
+++ b/modules/chat_agent_coder.py
@@ -1,6 +1,6 @@
"""
-Coder-Agent für die Entwicklung und Ausführung von Python-Code.
-Angepasst für die neue chat.py Architektur und chat_registry.py.
+Coder agent for development and execution of Python code.
+Optimized for the new task-based processing.
"""
import logging
@@ -20,175 +20,277 @@ logger = logging.getLogger(__name__)
class AgentCoder(AgentBase):
- """Agent für die Entwicklung und Ausführung von Python-Code"""
+ """Agent for development and execution of Python code"""
def __init__(self):
- """Initialisiert den Coder-Agent"""
+ """Initialize the coder agent"""
super().__init__()
- self.name = "Python Code Agent"
- self.capabilities = "code_development,data_processing,file_processing,automation"
+ self.name = "coder"
+ self.description = "Develops and executes Python code for data processing and automation"
+ self.capabilities = [
+ "code_development",
+ "data_processing",
+ "file_processing",
+ "automation",
+ "code_execution"
+ ]
- # Executor-Einstellungen
- self.executor_timeout = 60 # Sekunden
+ # Executor settings
+ self.executor_timeout = 60 # seconds
self.executor_memory_limit = 512 # MB
- # KI-Service-Einstellungen
- self.ai_temperature = 0.1 # Niedrigere Temperatur für deterministische Codegenerierung
+ # AI service settings
+ self.ai_temperature = 0.1 # Lower temperature for deterministic code generation
- # Auto-Korrektur-Einstellungen
- self.max_correction_attempts = 3 # Maximale Anzahl von Korrekturversuchen
+ # Auto-correction settings
+ self.max_correction_attempts = 3 # Maximum number of correction attempts
- def get_agent_info(self) -> Dict[str, Any]:
- """Gibt Agent-Informationen für die Registry zurück"""
- info = super().get_config()
- return info
-
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
- Verarbeitet eine Nachricht zur Entwicklung und Ausführung von Python-Code.
+ Process a standardized task structure and perform code development/execution.
Args:
- message: Die zu verarbeitende Nachricht
- context: Zusätzliche Kontextinformationen
-
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - prompt: The main instruction for the agent
+ - input_documents: List of documents to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
Returns:
- Antwortnachricht
+ A dictionary containing:
+ - feedback: Text response explaining the code execution
+ - documents: List of created document objects
"""
- # Workflow-ID aus Kontext oder Nachricht extrahieren
- workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
-
- # Antwortstruktur erstellen
- response = {
- "role": "assistant",
- "content": "",
- "agent_name": self.name,
- "workflow_id": workflow_id,
- "documents": []
- }
-
try:
- # Inhalt und Dokumente extrahieren
- content = message.get("content", "")
- documents = message.get("documents", [])
+ # Extract relevant task information
+ prompt = task.get("prompt", "")
+ input_documents = task.get("input_documents", [])
+ output_specs = task.get("output_specifications", [])
+ context_info = task.get("context", {})
- # Code basierend auf dem Nachrichteninhalt mit KI generieren
- logger.info("Generiere neuen Code mit KI")
+ # Check if AI service is available
+ if not self.ai_service:
+ logger.error("No AI service configured for the Coder agent")
+ return {
+ "feedback": "The Coder agent is not properly configured.",
+ "documents": []
+ }
+
+ # Extract context from input documents
+ document_context = self._extract_document_context(input_documents)
+
+ # Generate code based on the prompt and document context
+ logger.info("Generating code based on the task")
+ code_to_execute, requirements = await self._generate_code_from_prompt(prompt, document_context)
- # Code mit KI generieren
- code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
if not code_to_execute:
- logger.warning("KI konnte keinen Code generieren")
- response["content"] = "Ich konnte basierend auf Ihrer Anfrage keinen ausführbaren Code generieren. Bitte geben Sie detailliertere Anweisungen."
- return response
-
- logger.info(f"Code mit KI generiert ({len(code_to_execute)} Zeichen)")
-
- # Code-Datei-Dokument erstellen
- code_doc_id = f"code_{uuid.uuid4()}"
- code_filename = "generated_code.py"
-
- code_document = {
- "id": code_doc_id,
- "source": {
- "type": "generated",
- "id": code_doc_id,
- "name": code_filename,
- "content_type": "text/x-python"
- },
- "contents": [{
- "type": "text",
- "text": code_to_execute,
- "is_extracted": True
- }]
- }
-
- # Code-Dokument zur Antwort hinzufügen
- response["documents"].append(code_document)
- logger.info(f"Code-Datei '{code_filename}' zur Antwort hinzugefügt")
-
- # Code mit Auto-Korrektur-Schleife ausführen
- if code_to_execute:
- # Ausführungskontext vorbereiten
- execution_context = {
- "workflow_id": workflow_id,
- "documents": documents,
- "message": message
+ logger.warning("AI couldn't generate any code")
+ return {
+ "feedback": "I couldn't generate executable code based on the task. Please provide more detailed instructions.",
+ "documents": []
}
- # Verbesserte Ausführung mit Auto-Korrektur
- result, attempts_info = await self._execute_with_auto_correction(
- code_to_execute,
- requirements,
- execution_context,
- content # Originaler Prompt/Nachricht
+ logger.info(f"Code generated with AI ({len(code_to_execute)} characters)")
+
+ # Collect created documents
+ generated_documents = []
+
+ # Add code as first document
+ code_doc = {
+ "label": "generated_code.py",
+ "content": code_to_execute
+ }
+ generated_documents.append(code_doc)
+
+ # Execute code with auto-correction loop
+ execution_context = {
+ "input_documents": input_documents,
+ "task": task
+ }
+
+ # Enhanced execution with auto-correction
+ result, attempts_info = await self._execute_with_auto_correction(
+ code_to_execute,
+ requirements,
+ execution_context,
+ prompt # Original prompt/message
+ )
+
+ # Create output documents based on execution result and output specifications
+ if result.get("success", False):
+ # Code execution successful
+ output = result.get("output", "")
+ execution_result = result.get("result")
+ logger.info("Code executed successfully")
+
+ # Determine output type of the result
+ result_docs = self._generate_result_documents(
+ attempts_info[-1]["code"], # Last successful code
+ output,
+ execution_result,
+ output_specs
)
- # Antwort basierend auf dem endgültigen Ergebnis vorbereiten (Erfolg oder Fehler)
- if result.get("success", False):
- # Code-Ausführung erfolgreich
- output = result.get("output", "")
- execution_result = result.get("result")
- logger.info("Code erfolgreich ausgeführt")
-
- # Antwortinhalt formatieren
- response_content = f"## Code erfolgreich ausgeführt"
-
- # Informationen zu Korrekturversuchen hinzufügen, falls Korrekturen vorgenommen wurden
- if attempts_info and len(attempts_info) > 1:
- response_content += f" (nach {len(attempts_info)-1} Korrekturversuchen)"
-
- response_content += "\n\n"
-
- # Den ausgeführten Code einbeziehen
- response_content += f"### Ausgeführter Code\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
-
- # Die Ausgabe einbeziehen, falls verfügbar
- if output:
- response_content += f"### Ausgabe\n\n```\n{output}\n```\n\n"
-
- # Das Ausführungsergebnis einbeziehen, falls verfügbar
- if execution_result:
- result_str = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
- response_content += f"### Ergebnis\n\n```\n{result_str}\n```\n\n"
-
- response["content"] = response_content
-
- else:
- # Code-Ausführung nach allen Versuchen fehlgeschlagen
- error = result.get("error", "Unbekannter Fehler")
- logger.error(f"Fehler bei der Code-Ausführung nach allen Korrekturversuchen: {error}")
-
- # Fehlerantwort formatieren
- response_content = f"## Fehler bei der Code-Ausführung\n\n"
-
- # Informationen zu Korrekturversuchen hinzufügen
- if attempts_info:
- response_content += f"Ich habe {len(attempts_info)} Versuche unternommen, den Code zu korrigieren, konnte aber nicht alle Probleme lösen.\n\n"
-
- # Den letzten Versuch hinzufügen
- response_content += f"### Letzter Code-Versuch\n\n```python\n{attempts_info[-1]['code']}\n```\n\n"
- response_content += f"### Letzter Fehler\n\n```\n{attempts_info[-1]['error']}\n```\n\n"
-
- # Empfehlung basierend auf dem Fehler hinzufügen
- response_content += "### Empfehlung\n\n"
- response_content += self._get_error_recommendation(error)
- else:
- # Nur den Code und den Fehler anzeigen
- response_content += f"### Ausgeführter Code\n\n```python\n{code_to_execute}\n```\n\n"
- response_content += f"### Fehler\n\n```\n{error}\n```\n\n"
-
- response["content"] = response_content
+ # Add result documents
+ generated_documents.extend(result_docs)
+
+ # Create feedback for successful execution
+ feedback = f"I successfully executed the code and generated {len(result_docs)} output files."
+ if attempts_info and len(attempts_info) > 1:
+ feedback += f" (This required {len(attempts_info)-1} correction attempts)"
+
else:
- # Kein auszuführender Code
- response["content"] = "Ich konnte keinen ausführbaren Code finden oder generieren. Bitte geben Sie Python-Code an oder erklären Sie Ihre Anforderungen klarer."
+ # Code execution failed after all attempts
+ error = result.get("error", "Unknown error")
+ logger.error(f"Error in code execution after all correction attempts: {error}")
+
+ # Add error log as additional document
+ error_doc = {
+ "label": "execution_error.txt",
+ "content": f"Execution error:\n\n{error}"
+ }
+ generated_documents.append(error_doc)
+
+ # Create feedback for failed execution
+ feedback = f"An error occurred during code execution after {len(attempts_info)} correction attempts."
- return response
+ # If no specific outputs requested, create standard outputs
+ if not output_specs and result.get("success", False):
+ # Add standard output document
+ output_doc = {
+ "label": "execution_output.txt",
+ "content": output
+ }
+ generated_documents.append(output_doc)
+
+ # If a result is available, also add as JSON document
+ if execution_result:
+ result_json = json.dumps(execution_result, indent=2) if isinstance(execution_result, (dict, list)) else str(execution_result)
+ result_doc = {
+ "label": "execution_result.json",
+ "content": result_json
+ }
+ generated_documents.append(result_doc)
+
+ return {
+ "feedback": feedback,
+ "documents": generated_documents
+ }
except Exception as e:
- error_msg = f"Fehler bei der Verarbeitung durch den Coder-Agent: {str(e)}"
+ error_msg = f"Error during processing by the Coder agent: {str(e)}"
logger.error(error_msg)
- response["content"] = f"## Verarbeitungsfehler\n\n```\n{error_msg}\n```"
- return response
+ return {
+ "feedback": f"An error occurred during code processing: {str(e)}",
+ "documents": []
+ }
+
+ def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
+ """
+ Extract context from input documents for code generation.
+
+ Args:
+ documents: List of document objects
+
+ Returns:
+ Extracted context as text
+ """
+ context_parts = []
+
+ for doc in documents:
+ doc_name = doc.get("name", "Unnamed document")
+ context_parts.append(f"--- {doc_name} ---")
+
+ for content in doc.get("contents", []):
+ if content.get("metadata", {}).get("is_text", False):
+ context_parts.append(content.get("data", ""))
+
+ return "\n\n".join(context_parts)
+
+ def _generate_result_documents(self, code: str, output: str, execution_result: Any,
+ output_specs: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ """
+ Generate output documents based on execution results and specifications.
+
+ Args:
+ code: Executed code
+ output: Text output of the execution
+ execution_result: Result object from execution
+ output_specs: Output specifications
+
+ Returns:
+ List of generated document objects
+ """
+ documents = []
+
+ # If no specific outputs requested
+ if not output_specs:
+ return documents
+
+ # Generate appropriate document for each requested output
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ output_description = spec.get("description", "")
+
+ # Determine output type based on file extension
+ format_type = self._determine_format_type(output_label)
+
+ # Generate document content based on format and output
+ if "code" in output_label.lower() or format_type in ["py", "js", "html", "css"]:
+ # Code document
+ documents.append({
+ "label": output_label,
+ "content": code
+ })
+ elif "output" in output_label.lower() or format_type == "txt":
+ # Output document
+ documents.append({
+ "label": output_label,
+ "content": output
+ })
+ elif format_type in ["json", "yml", "yaml"] and execution_result:
+ # JSON result document
+ if isinstance(execution_result, (dict, list)):
+ content = json.dumps(execution_result, indent=2)
+ else:
+ content = str(execution_result)
+
+ documents.append({
+ "label": output_label,
+ "content": content
+ })
+ else:
+ # Generic result document (fallback)
+ result_str = ""
+ if execution_result:
+ if isinstance(execution_result, (dict, list)):
+ result_str = json.dumps(execution_result, indent=2)
+ else:
+ result_str = str(execution_result)
+
+ documents.append({
+ "label": output_label,
+ "content": f"Code output:\n\n{output}\n\nResult:\n\n{result_str}"
+ })
+
+ return documents
+
+ def _determine_format_type(self, output_label: str) -> str:
+ """
+ Determine the format type based on the filename.
+
+ Args:
+ output_label: Output filename
+
+ Returns:
+ Format type (py, js, json, txt, etc.)
+ """
+ if not '.' in output_label:
+ return "txt" # Default format
+
+ extension = output_label.split('.')[-1].lower()
+ return extension
async def _execute_with_auto_correction(
self,
@@ -198,33 +300,33 @@ class AgentCoder(AgentBase):
original_prompt: str
) -> Tuple[Dict[str, Any], List[Dict[str, Any]]]:
"""
- Führt Code mit automatischer Fehlerkorrektur und Wiederholungsversuchen aus.
+ Execute code with automatic error correction and retry attempts.
Args:
- initial_code: Der anfängliche Python-Code
- requirements: Liste erforderlicher Pakete
- context: Zusätzlicher Kontext für die Ausführung
- original_prompt: Die ursprüngliche Benutzeranfrage/Prompt
+ initial_code: The initial Python code
+ requirements: List of required packages
+ context: Additional context for execution
+ original_prompt: The original user request/prompt
Returns:
- Tuple aus (endgültiges Ausführungsergebnis, Liste von Versuchsinfo-Dictionarys)
+ Tuple of (final execution result, list of attempt info dictionaries)
"""
- # Verfolgungs-Daten initialisieren
+ # Initialize tracking data
current_code = initial_code
current_requirements = requirements.copy() if requirements else []
attempts_info = []
- # Mit Korrekturschleife ausführen
+ # Execute with correction loop
for attempt in range(1, self.max_correction_attempts + 1):
if attempt == 1:
- logger.info(f"Führe Code aus (Versuch {attempt}/{self.max_correction_attempts})")
+ logger.info(f"Executing code (attempt {attempt}/{self.max_correction_attempts})")
else:
- logger.info(f"Führe korrigierten Code aus (Versuch {attempt}/{self.max_correction_attempts})")
+ logger.info(f"Executing corrected code (attempt {attempt}/{self.max_correction_attempts})")
- # Aktuelle Code-Version ausführen
+ # Execute current code version
result = await self._execute_code(current_code, current_requirements, context)
- # Versuchsinformationen aufzeichnen
+ # Record attempt information
attempts_info.append({
"attempt": attempt,
"code": current_code,
@@ -232,22 +334,22 @@ class AgentCoder(AgentBase):
"success": result.get("success", False)
})
- # Prüfen, ob die Ausführung erfolgreich war
+ # Check if execution was successful
if result.get("success", False):
- # Erfolg! Ergebnis und Versuchsinfo zurückgeben
+ # Success! Return result and attempt info
return result, attempts_info
- # Fehlgeschlagene Ausführung - prüfen, ob die maximale Versuchsgrenze erreicht wurde
+ # Failed execution - check if max attempt limit reached
if attempt >= self.max_correction_attempts:
- logger.warning(f"Maximale Korrekturversuche ({self.max_correction_attempts}) erreicht, Aufgabe")
+ logger.warning(f"Maximum correction attempts ({self.max_correction_attempts}) reached")
break
- # Code basierend auf dem Fehler korrigieren
- error_message = result.get("error", "Unbekannter Fehler")
+ # Correct code based on the error
+ error_message = result.get("error", "Unknown error")
- logger.info(f"Versuche, Code-Fehler zu beheben: {error_message[:200]}...")
+ logger.info(f"Attempting to fix code error: {error_message[:200]}...")
- # Korrigierten Code generieren
+ # Generate corrected code
corrected_code, new_requirements = await self._generate_code_correction(
current_code,
error_message,
@@ -255,22 +357,22 @@ class AgentCoder(AgentBase):
current_requirements
)
- # Für den nächsten Versuch aktualisieren
+ # Update for next attempt
if corrected_code:
current_code = corrected_code
- # Neue Anforderungen hinzufügen
+ # Add new requirements
if new_requirements:
for req in new_requirements:
if req not in current_requirements:
current_requirements.append(req)
- logger.info(f"Neue Anforderung hinzugefügt: {req}")
+ logger.info(f"Added new requirement: {req}")
else:
- # Korrektur konnte nicht generiert werden, Schleife beenden
- logger.warning("Konnte keine Code-Korrektur generieren, Aufgabe")
+ # Correction couldn't be generated, end loop
+ logger.warning("Couldn't generate code correction")
break
- # Wenn wir hierher gelangen, sind alle Versuche fehlgeschlagen - das letzte Ergebnis und die Versuchsinfo zurückgeben
+ # If we reach here, all attempts failed - return last result and attempt info
return result, attempts_info
async def _generate_code_correction(
@@ -281,76 +383,76 @@ class AgentCoder(AgentBase):
current_requirements: List[str] = None
) -> Tuple[str, List[str]]:
"""
- Generiert eine korrigierte Version des Codes basierend auf Fehlermeldungen.
+ Generate a corrected version of code based on error messages.
Args:
- code: Der Code, der Fehler erzeugt hat
- error_message: Die zu behebende Fehlermeldung
- original_prompt: Die ursprüngliche Aufgabe/Anforderungen
- current_requirements: Liste der aktuell erforderlichen Pakete
+ code: The code that generated errors
+ error_message: The error message to fix
+ original_prompt: The original task/requirements
+ current_requirements: List of currently required packages
Returns:
- Tuple aus (korrigierter Code, neue Anforderungsliste)
+ Tuple of (corrected code, new requirements list)
"""
try:
- # Detaillierten Prompt für Code-Korrektur erstellen
- correction_prompt = f"""Du musst einen Fehler in Python-Code beheben. Der Code wurde für diese Aufgabe geschrieben:
+ # Create detailed prompt for code correction
+ correction_prompt = f"""You need to fix an error in Python code. The code was written for this task:
-ORIGINALE AUFGABE:
+ORIGINAL TASK:
{original_prompt}
-AKTUELLER CODE:
+CURRENT CODE:
```python
{code}
```
-FEHLERMELDUNG:
+ERROR MESSAGE:
```
{error_message}
```
-AKTUELLE ANFORDERUNGEN: {', '.join(current_requirements) if current_requirements else "Keine"}
+CURRENT REQUIREMENTS: {', '.join(current_requirements) if current_requirements else "None"}
-Deine Aufgabe ist es, den Fehler zu analysieren und eine korrigierte Version des Codes bereitzustellen.
-Konzentriere dich speziell auf die Behebung des Fehlers unter Beibehaltung der ursprünglichen Funktionalität.
+Your task is to analyze the error and provide a corrected version of the code.
+Focus specifically on fixing the error while maintaining the original functionality.
-Häufige Korrekturen sind:
-- Behebung von Syntaxfehlern (fehlende Klammern, Einrückung usw.)
-- Lösung von Import-Fehlern durch Hinzufügen geeigneter Anforderungen
-- Korrektur von Dateipfaden oder Behandlung von "Datei nicht gefunden"-Fehlern
-- Hinzufügen von Fehlerbehandlung für bestimmte Randfälle
-- Behebung logischer Fehler im Code
+Common fixes include:
+- Fixing syntax errors (missing parentheses, indentation, etc.)
+- Solving import errors by adding appropriate requirements
+- Correcting file paths or handling "file not found" errors
+- Adding error handling for specific edge cases
+- Fixing logical errors in the code
-FORMATIERUNGSHINWEISE:
-1. Gib NUR den vollständigen korrigierten Python-Code an OHNE Erklärungen
-2. Verwende KEINE Codeblock-Markierungen wie ```python oder ```
-3. Erkläre NICHT, was der Code davor oder danach tut
-4. Füge KEINEN Text hinzu, der kein gültiger Python-Code ist
-5. Beginne deine Antwort direkt mit dem gültigen Python-Code
-6. Beende deine Antwort mit gültigem Python-Code
+FORMATTING GUIDELINES:
+1. Provide ONLY the complete corrected Python code WITHOUT explanations
+2. Do NOT use code block markers like ```python or ```
+3. Do NOT explain what the code does before or after
+4. Do NOT add any text that isn't valid Python code
+5. Start your answer directly with valid Python code
+6. End your answer with valid Python code
-Wenn du neue erforderliche Pakete hinzufügen musst, platziere sie in einem speziell formatierten Kommentar am Anfang deines Codes wie folgt:
-# REQUIREMENTS: paket1,paket2,paket3
+If you need to add new required packages, place them in a specially formatted comment at the beginning of your code as follows:
+# REQUIREMENTS: package1,package2,package3
-Deine gesamte Antwort muss gültiges Python sein, das ohne Änderungen ausgeführt werden kann.
+Your entire answer must be valid Python that can be executed without modifications.
"""
- # Nachrichten für die API erstellen
+ # Create messages for API
messages = [
- {"role": "system", "content": "Du bist ein Python-Debugging-Experte. Du gibst NUR sauberen, fehlerfreien Python-Code zurück, ohne Erklärungen, Markdown-Formatierung oder Text, der kein Code ist. Deine Antwort sollte nur gültiger, korrigierter Python-Code sein, der direkt ausgeführt werden kann."},
+ {"role": "system", "content": "You are a Python debugging expert. You provide ONLY clean, error-free Python code, without explanations, markdown formatting, or text that isn't code."},
{"role": "user", "content": correction_prompt}
]
- # API mit sehr niedriger Temperatur für deterministische Korrekturen aufrufen
+ # Call API with very low temperature for deterministic corrections
generated_content = await self.ai_service.call_api(
messages,
temperature=0.1
)
- # Den generierten Inhalt bereinigen, um sicherzustellen, dass es sich nur um gültigen Python-Code handelt
+ # Clean up the generated content to ensure it's only valid Python code
fixed_code = self._clean_code(generated_content)
- # Anforderungen aus speziellem Kommentar am Anfang des Codes extrahieren
+ # Extract requirements from special comment at beginning of code
new_requirements = []
for line in fixed_code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
@@ -361,25 +463,25 @@ Deine gesamte Antwort muss gültiges Python sein, das ohne Änderungen ausgefüh
return fixed_code, new_requirements
except Exception as e:
- logging.error(f"Fehler bei der Generierung der Code-Korrektur: {str(e)}")
- # None zurückgeben, um Fehler anzuzeigen
+ logging.error(f"Error generating code correction: {str(e)}")
+ # Return None to indicate failure
return None, []
def _clean_code(self, code: str) -> str:
"""
- Bereinigt Code durch Entfernen von Markdown-Codeblock-Markierungen und anderen Formatierungsartefakten.
+ Clean code by removing markdown code block markers and other formatting artifacts.
Args:
- code: Der zu bereinigende Code-String
+ code: The code string to clean
Returns:
- Bereinigter Code-String
+ Cleaned code string
"""
- # Codeblock-Markierungen am Anfang/Ende entfernen
+ # Remove code block markers at beginning/end
code = re.sub(r'^```(?:python)?\s*', '', code)
code = re.sub(r'```\s*$', '', code)
- # Zeilen in umgekehrter Reihenfolge durchgehen, um dem Ende zu beginnen
+ # Process lines in reverse order to start from the end
lines = code.split('\n')
clean_lines = []
in_trailing_markdown = False
@@ -387,94 +489,84 @@ Deine gesamte Antwort muss gültiges Python sein, das ohne Änderungen ausgefüh
for line in reversed(lines):
stripped = line.strip()
- # Prüfen, ob diese Zeile nur Backticks enthält (``` oder ` oder ``)
+ # Check if this line contains only backticks (``` or ` or ``)
if re.match(r'^`{1,3}$', stripped):
in_trailing_markdown = True
continue
- # Wenn wir tatsächlichen Code erreicht haben, keine nachfolgende Markdown-Berücksichtigung mehr
+ # If we've reached actual code, no more trailing markdown consideration
if stripped and not in_trailing_markdown:
in_trailing_markdown = False
- # Diese Zeile hinzufügen, wenn sie nicht Teil von nachfolgendem Markdown ist
+ # Add this line if it's not part of trailing markdown
if not in_trailing_markdown:
clean_lines.insert(0, line)
- # Zeilen wieder zusammenfügen
+ # Rejoin lines
clean_code = '\n'.join(clean_lines)
- # Endgültige Bereinigung für alle restlichen Backticks
- clean_code = re.sub(r'`{1,3}\s*, ', clean_code)
+ # Final cleanup for any remaining backticks
+ clean_code = re.sub(r'`{1,3}\s*', '', clean_code)
return clean_code.strip()
- async def _generate_code_from_prompt(self, prompt: str, documents: List[Dict[str, Any]]) -> Tuple[str, List[str]]:
+ async def _generate_code_from_prompt(self, prompt: str, document_context: str) -> Tuple[str, List[str]]:
"""
- Generiert Python-Code aus einem Prompt mithilfe des KI-Dienstes.
+ Generate Python code from a prompt using the AI service.
Args:
- prompt: Der Prompt, aus dem Code generiert wird
- documents: Mit dem Prompt verbundene Dokumente
+ prompt: The prompt to generate code from
+ document_context: Context extracted from documents
Returns:
- Tuple aus (generierter Python-Code, erforderliche Pakete)
+ Tuple of (generated Python code, required packages)
"""
try:
- # Prompt für die Codegenerierung vorbereiten
- ai_prompt = f"""Generiere Python-Code, um die folgende Aufgabe zu lösen:
+ # Prepare prompt for code generation
+ ai_prompt = f"""Generate Python code to solve the following task:
+
+TASK:
{prompt}
-Verfügbare Eingabedateien:
-"""
- # Informationen über verfügbare Dokumente hinzufügen
- if documents:
- for i, doc in enumerate(documents):
- source = doc.get("source", {})
- doc_name = source.get("name", f"Dokument {i+1}")
- doc_type = source.get("content_type", "unbekannt")
- doc_id = source.get("id", "")
-
- ai_prompt += f"- {doc_name} (Typ: {doc_type}, ID: {doc_id})\n"
- else:
- ai_prompt += "Keine Eingabedateien verfügbar.\n"
-
- ai_prompt += """
-WICHTIGE ANFORDERUNGEN:
-1. Dein Code MUSS eine 'result'-Variable definieren, um das endgültige Ergebnis zu speichern.
-2. Am Ende deines Skripts sollte die result-Variable ausgegeben werden.
-3. Mache deine 'result'-Variable zu einem Dictionary oder einer anderen JSON-serialisierbaren Datenstruktur, die alle relevanten Ausgaben enthält.
-4. Kommentiere den Code gut, um wichtige Operationen zu erklären.
-5. Mache deinen Code vollständig und in sich geschlossen.
-6. Füge eine angemessene Fehlerbehandlung hinzu.
+PROVIDED CONTEXT:
+{document_context if document_context else "No additional context available."}
-FORMATIERUNGSANWEISUNGEN:
-- Gib NUR den Python-Code zurück, OHNE Einleitung, Erklärung oder Abschlusstext
-- Verwende KEINE Codeblock-Markierungen wie ```python oder ```
-- Erkläre NICHT, was der Code davor oder danach tut
-- Füge KEINEN Text hinzu, der kein gültiger Python-Code ist
-- Beginne deine Antwort direkt mit gültigem Python-Code
-- Beende deine Antwort mit gültigem Python-Code
+IMPORTANT REQUIREMENTS:
+1. Your code MUST define a 'result' variable to store the final result.
+2. At the end of your script, the result variable should be output.
+3. Make your 'result' variable a dictionary or other JSON-serializable data structure containing all relevant outputs.
+4. Comment your code well to explain important operations.
+5. Make your code complete and self-contained.
+6. Add appropriate error handling.
-Für erforderliche Pakete platziere sie in einem speziell formatierten Kommentar am Anfang deines Codes in einer Zeile wie folgt:
+FORMATTING INSTRUCTIONS:
+- Return ONLY the Python code, WITHOUT introduction, explanation, or conclusion text
+- Do NOT use code block markers like ```python or ```
+- Do NOT explain what the code does before or after
+- Do NOT add any text that isn't valid Python code
+- Start your answer directly with valid Python code
+- End your answer with valid Python code
+
+For required packages, place them in a specially formatted comment at the beginning of your code in one line as follows:
# REQUIREMENTS: pandas,numpy,matplotlib,requests
-Deine gesamte Antwort muss gültiges Python sein, das ohne Änderungen ausgeführt werden kann.
+Your entire answer must be valid Python that can be executed without modifications.
"""
- # Nachrichten für die API erstellen
+ # Create messages for API
messages = [
- {"role": "system", "content": "Du bist ein Python-Codegenerator, der NUR sauberen, ausführbaren Python-Code ohne Erklärungen, Markdown-Formatierung oder Nicht-Code-Text liefert. Deine Antwort sollte ausschließlich aus gültigem Python-Code bestehen, der direkt ausgeführt werden kann."},
+ {"role": "system", "content": "You are a Python code generator who provides ONLY clean, executable Python code with no explanations, markdown formatting, or non-code text."},
{"role": "user", "content": ai_prompt}
]
- # API aufrufen
- logging.info(f"KI-API aufrufen, um Code zu generieren")
+ # Call API
+ logging.info(f"Calling AI API to generate code")
generated_content = await self.ai_service.call_api(messages, temperature=self.ai_temperature)
- # Den generierten Inhalt bereinigen, um sicherzustellen, dass es sich nur um gültigen Python-Code handelt
+ # Clean up the generated content to ensure it's only valid Python code
code = self._clean_code(generated_content)
- # Anforderungen aus speziellem Kommentar am Anfang des Codes extrahieren
+ # Extract requirements from special comment at beginning of code
requirements = []
for line in code.split('\n'):
if line.strip().startswith("# REQUIREMENTS:"):
@@ -485,62 +577,50 @@ Deine gesamte Antwort muss gültiges Python sein, das ohne Änderungen ausgefüh
return code, requirements
except Exception as e:
- logging.error(f"Fehler bei der Generierung von Code mit KI: {str(e)}")
- # Grundlegenden Fehlerbehandlungscode und keine Anforderungen zurückgeben
+ logging.error(f"Error generating code with AI: {str(e)}")
+ # Return basic error handling code and no requirements
error_str = str(e).replace('"', '\\"')
return f"""
-# Fehler bei der Codegenerierung
-print(f"Bei der Codegenerierung ist ein Fehler aufgetreten: {error_str}")
-# Fehlerergebnis zurückgeben
-result = {{"error": "Codegenerierung fehlgeschlagen", "message": "{error_str}"}}
+# Error in code generation
+print(f"An error occurred during code generation: {error_str}")
+# Return error result
+result = {{"error": "Code generation failed", "message": "{error_str}"}}
""", []
async def _execute_code(self, code: str, requirements: List[str] = None, context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
- Führt Python-Code mit dem SimpleCodeExecutor aus.
+ Execute Python code in an isolated environment.
Args:
- code: Der auszuführende Python-Code
- requirements: Liste erforderlicher Pakete
- context: Zusätzlicher Kontext für die Ausführung
+ code: The Python code to execute
+ requirements: List of required packages
+ context: Additional context for execution
Returns:
- Ergebnis der Codeausführung
+ Result of code execution
"""
- # Workflow-ID abrufen und Logging einrichten
- workflow_id = context.get("workflow_id", "") if context else ""
-
+ # Use virtual code executor for isolated execution
try:
- # Liste blockierter Pakete für die Sicherheit
- blocked_packages = [
- "cryptography", "flask", "django", "tornado", # Sicherheitsrisiken
- "tensorflow", "pytorch", "scikit-learn" # Ressourcenintensive Pakete
- ]
-
- # SimpleCodeExecutor mit Anforderungen und workflow_id für Persistenz initialisieren
executor = SimpleCodeExecutor(
- workflow_id=workflow_id,
timeout=self.executor_timeout,
max_memory_mb=self.executor_memory_limit,
requirements=requirements,
- blocked_packages=blocked_packages,
ai_service=self.ai_service
)
- # Eingabedaten für den Code vorbereiten
- input_data = {"context": context, "workflow_id": workflow_id}
+ # Prepare input data for the code
+ input_data = {"context": context} if context else {}
- # Code ausführen
+ # Execute code
result = executor.execute_code(code, input_data)
- # Nicht-persistente Umgebungen bereinigen
- if not executor.is_persistent:
- executor.cleanup()
+ # Clean up environment
+ executor.cleanup()
return result
except Exception as e:
- error_message = f"Fehler bei der Codeausführung: {str(e)}"
+ error_message = f"Error during code execution: {str(e)}"
logger.error(error_message)
return {
@@ -549,153 +629,93 @@ result = {{"error": "Codegenerierung fehlgeschlagen", "message": "{error_str}"}}
"error": error_message,
"result": None
}
-
- def _get_error_recommendation(self, error_message: str) -> str:
- """Generiert Empfehlungen basierend auf der Fehlermeldung."""
- if "ImportError" in error_message or "ModuleNotFoundError" in error_message:
- return """
-Versuchen Sie, Standardbibliotheken oder häufig verwendete Datenanalysemodule zu verwenden.
-"""
- elif "PermissionError" in error_message:
- return """
-Der Code hat nicht die notwendigen Berechtigungen, um auf Dateien oder Verzeichnisse zuzugreifen.
-"""
- elif "SyntaxError" in error_message:
- return """
-Es gibt einen Syntaxfehler im Code. Überprüfen Sie auf fehlende Klammern, Anführungszeichen, Doppelpunkte oder Einrückungsfehler.
-"""
- elif "FileNotFoundError" in error_message:
- return """
-Eine Datei konnte nicht gefunden werden. Überprüfen Sie den Dateipfad und stellen Sie sicher, dass die Datei existiert.
-"""
- else:
- return """
-Um den Fehler zu beheben:
-1. Überprüfen Sie die genaue Fehlermeldung
-2. Vereinfachen Sie den Code und testen Sie schrittweise
-3. Verwenden Sie try/except-Blöcke für fehleranfällige Operationen
-"""
class SimpleCodeExecutor:
"""
- Ein vereinfachter Executor, der Python-Code in isolierten virtuellen Umgebungen ausführt.
+ A simplified executor that runs Python code in isolated virtual environments.
"""
- # Klassenvariable zum Speichern von Workflow-Umgebungen für die Persistenz
- _workflow_environments = {}
-
def __init__(self,
- workflow_id: str = None,
timeout: int = 30,
max_memory_mb: int = 512,
requirements: List[str] = None,
- blocked_packages: List[str] = None,
ai_service = None):
"""
- Initialisiert den SimpleCodeExecutor.
+ Initialize the SimpleCodeExecutor.
Args:
- workflow_id: Optionale Workflow-ID für persistente Umgebungen
- timeout: Maximale Ausführungszeit in Sekunden
- max_memory_mb: Maximaler Speicher in MB
- requirements: Liste der zu installierenden Pakete
- blocked_packages: Liste blockierter Pakete
+ timeout: Maximum execution time in seconds
+ max_memory_mb: Maximum memory in MB
+ requirements: List of packages to install
+ ai_service: Optional - AI service for further processing
"""
- self.workflow_id = workflow_id
self.timeout = timeout
self.max_memory_mb = max_memory_mb
self.temp_dir = None
self.requirements = requirements or []
- self.blocked_packages = blocked_packages or [
- "cryptography", "flask", "django", "tornado", # Sicherheitsrisiken
- "tensorflow", "pytorch", "scikit-learn" # Ressourcenintensive Pakete
+ self.blocked_packages = [
+ "cryptography", "flask", "django", "tornado", # Security risks
+ "tensorflow", "pytorch", "scikit-learn" # Resource-intensive packages
]
- self.is_persistent = workflow_id is not None
self.ai_service = ai_service
def _create_venv(self) -> str:
- """Erstellt eine virtuelle Umgebung und gibt den Pfad zurück."""
- # Prüfen auf bestehende Umgebung bei Verwendung von workflow_id
- if self.workflow_id:
- self.is_persistent = True
- existing_env = self._workflow_environments.get(self.workflow_id)
- if existing_env and os.path.exists(existing_env):
- logger.info(f"Wiederverwendung bestehender virtueller Umgebung: {existing_env}")
- self.temp_dir = os.path.dirname(existing_env)
- return existing_env
-
- # Neue Umgebung erstellen
+ """Create a virtual environment and return the path."""
+ # Create new environment
venv_parent_dir = tempfile.mkdtemp(prefix="code_exec_")
self.temp_dir = venv_parent_dir
venv_path = os.path.join(venv_parent_dir, "venv")
try:
- # Virtuelle Umgebung erstellen
+ # Create virtual environment
subprocess.run([sys.executable, "-m", "venv", venv_path],
check=True,
capture_output=True)
-
- # Umgebungspfad speichern, wenn für einen bestimmten Workflow
- if self.workflow_id:
- self._workflow_environments[self.workflow_id] = venv_path
return venv_path
except subprocess.CalledProcessError as e:
- logger.error(f"Fehler beim Erstellen der virtuellen Umgebung: {e}")
- raise RuntimeError(f"Virtuelle Umgebung konnte nicht erstellt werden: {e}")
+ logger.error(f"Error creating virtual environment: {e}")
+ raise RuntimeError(f"Virtual environment could not be created: {e}")
def _get_python_executable(self, venv_path: str) -> str:
- """Gibt den Pfad zum Python-Executable in der virtuellen Umgebung zurück."""
+ """Return the path to the Python executable in the virtual environment."""
if os.name == 'nt': # Windows
return os.path.join(venv_path, "Scripts", "python.exe")
else: # Unix/Linux
return os.path.join(venv_path, "bin", "python")
- def _extract_required_packages(self, code: str) -> List[str]:
- """Extrahiert erforderliche Pakete aus REQUIREMENTS-Kommentaren in der ersten Codezeile"""
- packages = set()
- # Nach speziellem REQUIREMENTS-Kommentar suchen
- first_lines = code.split('\n')[:5] # Nur die ersten Zeilen prüfen
- for line in first_lines:
- if line.strip().startswith("# REQUIREMENTS:"):
- req_str = line.replace("# REQUIREMENTS:", "").strip()
- for pkg in req_str.split(','):
- if pkg.strip():
- packages.add(pkg.strip())
- return list(packages)
-
def execute_code(self, code: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
"""
- Führt Python-Code in einer isolierten Umgebung aus.
+ Execute Python code in an isolated environment.
Args:
- code: Auszuführender Python-Code
- input_data: Optionale Eingabedaten für den Code
+ code: Python code to execute
+ input_data: Optional input data for the code
Returns:
- Dictionary mit Ausführungsergebnissen
+ Dictionary with execution results
"""
- logger.info(f"Führe Code mit workflow_id aus: {self.workflow_id}")
+ logger.info("Executing code in isolated environment")
- # Virtuelle Umgebung erstellen oder wiederverwenden
+ # Create virtual environment
venv_path = self._create_venv()
- # Datei für den Code erstellen
+ # Create file for the code
code_id = uuid.uuid4().hex[:8]
code_file = os.path.join(self.temp_dir, f"code_{code_id}.py")
- # Code ohne zusätzlichen Loader-Code schreiben
+ # Write code
with open(code_file, "w", encoding="utf-8") as f:
f.write(code)
- # Python-Executable holen
+ # Get Python executable
python_executable = self._get_python_executable(venv_path)
- logger.info(f"Verwende Python-Executable: {python_executable}")
+ logger.info(f"Using Python executable: {python_executable}")
- # Code ausführen
+ # Execute code
try:
- # Code aus Root-Verzeichnis ausführen
+ # Execute code from root directory
working_dir = os.path.dirname(code_file)
process = subprocess.run(
[python_executable, code_file],
@@ -705,29 +725,29 @@ class SimpleCodeExecutor:
cwd=working_dir
)
- # Ausgabe verarbeiten
+ # Process output
stdout = process.stdout
stderr = process.stderr
- # Ergebnis aus stdout holen, falls verfügbar
+ # Get result from stdout if available
result_data = None
if process.returncode == 0 and stdout:
try:
- # Nach der letzten Zeile suchen, die JSON sein könnte
+ # Look for the last line that could be JSON
for line in reversed(stdout.strip().split('\n')):
line = line.strip()
if line and line[0] in '{[' and line[-1] in '}]':
try:
result_data = json.loads(line)
- # Erfolgreich geparste JSON-Ergebnis verwenden
+ # Use successfully parsed JSON result
break
except json.JSONDecodeError:
- # Kein gültiges JSON, mit nächster Zeile fortfahren
+ # Not valid JSON, continue with next line
continue
except Exception as e:
- logger.warning(f"Fehler beim Parsen des Ergebnisses aus stdout: {str(e)}")
+ logger.warning(f"Error parsing result from stdout: {str(e)}")
- # Ergebnisdictionary erstellen
+ # Create result dictionary
execution_result = {
"success": process.returncode == 0,
"output": stdout,
@@ -737,59 +757,54 @@ class SimpleCodeExecutor:
}
except subprocess.TimeoutExpired:
- logger.error(f"Ausführung nach {self.timeout} Sekunden abgelaufen")
+ logger.error(f"Execution timed out after {self.timeout} seconds")
execution_result = {
"success": False,
"output": "",
- "error": f"Ausführung abgelaufen (Timeout nach {self.timeout} Sekunden)",
+ "error": f"Execution timed out (timeout after {self.timeout} seconds)",
"result": None,
"exit_code": -1
}
except Exception as e:
- logger.error(f"Ausführungsfehler: {str(e)}")
+ logger.error(f"Execution error: {str(e)}")
execution_result = {
"success": False,
"output": "",
- "error": f"Ausführungsfehler: {str(e)}",
+ "error": f"Execution error: {str(e)}",
"result": None,
"exit_code": -1
}
- # Temporäre Code-Datei bereinigen
+ # Clean up temporary code file
try:
if os.path.exists(code_file):
os.remove(code_file)
except Exception as e:
- logger.warning(f"Fehler beim Bereinigen der temporären Code-Datei: {e}")
+ logger.warning(f"Error cleaning up temporary code file: {e}")
return execution_result
def cleanup(self):
- """Temporäre Ressourcen bereinigen."""
- # Bereinigung für persistente Umgebungen überspringen
- if self.is_persistent and self.workflow_id:
- logger.info(f"Überspringe Bereinigung für persistente Umgebung von Workflow {self.workflow_id}")
- return
-
- # Temporäres Verzeichnis bereinigen
+ """Clean up temporary resources."""
+ # Clean up temporary directory
if self.temp_dir and os.path.exists(self.temp_dir):
try:
shutil.rmtree(self.temp_dir)
- logger.info(f"Temporäres Verzeichnis gelöscht: {self.temp_dir}")
+ logger.info(f"Temporary directory deleted: {self.temp_dir}")
except Exception as e:
- logger.warning(f"Temporäres Verzeichnis {self.temp_dir} konnte nicht gelöscht werden: {e}")
+ logger.warning(f"Temporary directory {self.temp_dir} could not be deleted: {e}")
def __del__(self):
- """Bereinigung während der Garbage Collection."""
+ """Cleanup during garbage collection."""
self.cleanup()
-# Singleton-Instanz
-_coder_agent = None
-
+# Factory function for the Coder agent
def get_coder_agent():
- """Gibt eine Singleton-Instanz des Coder-Agenten zurück"""
- global _coder_agent
- if _coder_agent is None:
- _coder_agent = AgentCoder()
- return _coder_agent
\ No newline at end of file
+ """
+ Factory function that returns an instance of the Coder agent.
+
+ Returns:
+ An instance of the Coder agent
+ """
+ return AgentCoder()
\ No newline at end of file
diff --git a/modules/chat_agent_creative.py b/modules/chat_agent_creative.py
index 43b15cf6..cf705d68 100644
--- a/modules/chat_agent_creative.py
+++ b/modules/chat_agent_creative.py
@@ -1,113 +1,360 @@
"""
-Kreativer Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung.
-Angepasst für die neue chat.py Architektur und chat_registry.py.
+Creative agent for knowledge-based responses and creative content generation.
+Optimized for the new task-based processing.
"""
import logging
-from typing import Dict, Any, List, Optional
+from typing import Dict, Any, List
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentCreative(AgentBase):
- """Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung"""
+ """Agent for knowledge-based responses and creative content generation"""
def __init__(self):
- """Initialisiert den kreativen Agent"""
+ """Initialize the creative agent"""
super().__init__()
- self.name = "Creative Knowledge Assistant"
- self.capabilities = ("knowledge_sharing,content_creation,document_generation,"
- "creative_writing,poweron,document_processing,"
- "information_extraction,data_transformation,"
- "document_analysis,text_processing,table_creation,"
- "content_structuring")
+ self.name = "creative"
+ self.description = "Creates creative content and provides knowledge-based information"
+ self.capabilities = [
+ "knowledge_sharing",
+ "content_creation",
+ "creative_writing",
+ "information_synthesis",
+ "document_generation",
+ "question_answering"
+ ]
- def get_agent_info(self) -> Dict[str, Any]:
- """Gibt Agent-Informationen für die Registry zurück"""
- info = super().get_config()
- return info
-
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
- Verarbeitet eine Nachricht und generiert eine kreative oder wissensbasierte Antwort.
+ Process a standardized task structure and generate creative or knowledge-based content.
Args:
- message: Die zu verarbeitende Nachricht
- context: Zusätzlicher Kontext
-
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - prompt: The main instruction for the agent
+ - input_documents: List of documents to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
Returns:
- Die generierte Antwort
+ A dictionary containing:
+ - feedback: Text response explaining the created content
+ - documents: List of created document objects
"""
- # Workflow-ID aus Kontext oder Nachricht extrahieren
- workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
-
- # Antwortstruktur erstellen
- response = {
- "role": "assistant",
- "content": "",
- "agent_name": self.name,
- "workflow_id": workflow_id,
- "documents": []
- }
-
try:
- # Benutzernachricht extrahieren
- user_message = message.get("content", "")
+ # Extract relevant task information
+ prompt = task.get("prompt", "")
+ input_documents = task.get("input_documents", [])
+ output_specs = task.get("output_specifications", [])
- if not user_message:
- response["content"] = "Bitte geben Sie eine Nachricht an, auf die ich antworten kann."
- return response
+ # Check if AI service is available
+ if not self.ai_service:
+ logger.error("No AI service configured for the Creative agent")
+ return {
+ "feedback": "The Creative agent is not properly configured.",
+ "documents": []
+ }
- # PowerOn-Behandlung, falls in der Anfrage enthalten
- if "poweron" in user_message.lower():
- logger.info("PowerOn-Schlüsselwort erkannt, spezielle Antwort generieren")
+ # Extract context from input documents
+ document_context = self._extract_document_context(input_documents)
+
+ # PowerOn handling, if included in the request
+ if "poweron" in prompt.lower():
+ return await self._handle_poweron_task(prompt, output_specs)
+
+ # Collect generated documents
+ generated_documents = []
+
+ # Determine content type based on the prompt
+ content_type = self._determine_content_type(prompt)
+
+ # Generate a document for each requested output
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ output_description = spec.get("description", "")
- poweron_prompt = f"""
- Bedanke dich beim Benutzer in der Sprache seiner Anfrage ganz herzlich dafür, dass er daran denkt, dass du PowerOn bist.
- Teile ihm mit, wie erfreut du bist, Teil der PowerOn-Familie zu sein, die daran arbeitet, Menschen für ein besseres Leben zu unterstützen.
+ # Determine format based on file extension
+ format_type = self._determine_format_type(output_label)
- Generiere dann eine kurze Antwort (1-2 Sätze) auf diese Frage: {user_message}
- """
+ # Generate content based on format and requirements
+ content = await self._generate_content(
+ prompt,
+ document_context,
+ content_type,
+ format_type,
+ output_label,
+ output_description
+ )
- try:
- poweron_response = await self.ai_service.call_api([
- {"role": "system", "content": "Du bist ein hilfreicher Assistent, der Teil der PowerOn-Familie ist."},
- {"role": "user", "content": poweron_prompt}
- ])
-
- response["content"] = poweron_response
- return response
- except Exception as e:
- logger.error(f"Fehler beim Aufruf der API für PowerOn: {str(e)}")
- response["content"] = "Ich bin auf einen Fehler gestoßen, während ich eine PowerOn-Antwort generierte. Bitte versuchen Sie es erneut."
- return response
+ # Add document to results list
+ generated_documents.append({
+ "label": output_label,
+ "content": content
+ })
- # Einfacher Systemprompt, der sich auf die direkte Antwort auf die Benutzeranfrage konzentriert
- system_prompt = """Du bist ein hilfreicher, kreativer Assistent.
- Antworte direkt auf die Anfrage des Benutzers, ohne auf einen Workflow oder Systemkontext zu verweisen.
- Konzentriere dich nur darauf, eine direkte, hilfreiche Antwort auf die spezifische Frage oder Anfrage zu geben."""
+ # If no specific outputs requested, create default document
+ if not output_specs:
+ # Determine default format based on content type
+ default_format = "md" if content_type in ["article", "report", "story"] else "txt"
+ default_label = f"creative_content.{default_format}"
+
+ # Generate content
+ content = await self._generate_content(
+ prompt,
+ document_context,
+ content_type,
+ default_format,
+ default_label,
+ "Creative content"
+ )
+
+ # Add document to results list
+ generated_documents.append({
+ "label": default_label,
+ "content": content
+ })
- # Verarbeiten mit dem KI-Service
- content = await self.ai_service.call_api([
- {"role": "system", "content": system_prompt},
- {"role": "user", "content": user_message}
- ])
+ # Create feedback
+ if len(generated_documents) == 1:
+ feedback = f"I've created a creative content of type '{content_type}'."
+ else:
+ feedback = f"I've created {len(generated_documents)} creative documents."
- response["content"] = content
- return response
+ return {
+ "feedback": feedback,
+ "documents": generated_documents
+ }
except Exception as e:
- logger.error(f"Fehler in process_message: {str(e)}")
- response["content"] = f"Bei der Verarbeitung Ihrer Anfrage ist ein Fehler aufgetreten: {str(e)}"
- return response
+ error_msg = f"Error creating creative content: {str(e)}"
+ logger.error(error_msg)
+ return {
+ "feedback": f"An error occurred while creating creative content: {str(e)}",
+ "documents": []
+ }
+
+ def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
+ """
+ Extract context from input documents.
+
+ Args:
+ documents: List of document objects
+
+ Returns:
+ Extracted context as text
+ """
+ context_parts = []
+
+ for doc in documents:
+ doc_name = doc.get("name", "Unnamed document")
+ context_parts.append(f"--- {doc_name} ---")
+
+ for content in doc.get("contents", []):
+ if content.get("metadata", {}).get("is_text", False):
+ context_parts.append(content.get("data", ""))
+
+ return "\n\n".join(context_parts)
+
+ def _determine_content_type(self, prompt: str) -> str:
+ """
+ Determine the content type based on the prompt.
+
+ Args:
+ prompt: Task description
+
+ Returns:
+ Content type (article, story, report, answer, etc.)
+ """
+ prompt_lower = prompt.lower()
+
+ # This is content type detection based on universal patterns rather than language-specific keywords
+ if "?" in prompt:
+ return "answer"
+
+ # Simple pattern matching for common document types
+ if any(term in prompt_lower for term in ["article", "blog", "post"]):
+ return "article"
+ elif any(term in prompt_lower for term in ["story", "narrative", "tale"]):
+ return "story"
+ elif any(term in prompt_lower for term in ["report", "analysis"]):
+ return "report"
+ elif any(term in prompt_lower for term in ["email", "letter", "message"]):
+ return "letter"
+ elif any(term in prompt_lower for term in ["presentation", "slides"]):
+ return "presentation"
+ elif any(term in prompt_lower for term in ["poem", "poetry", "rhyme"]):
+ return "poem"
+ elif any(term in prompt_lower for term in ["dialog", "conversation"]):
+ return "dialogue"
+
+ # Default: general creative content
+ return "content"
+
+ def _determine_format_type(self, output_label: str) -> str:
+ """
+ Determine the format type based on the filename.
+
+ Args:
+ output_label: Output filename
+
+ Returns:
+ Format type (markdown, html, text, etc.)
+ """
+ if not '.' in output_label:
+ return "txt" # Default format
+
+ extension = output_label.split('.')[-1].lower()
+
+ if extension == "md":
+ return "markdown"
+ elif extension == "html":
+ return "html"
+ elif extension in ["txt", "text"]:
+ return "text"
+ elif extension == "json":
+ return "json"
+ else:
+ # Fallback to markdown for unknown extensions
+ return "markdown"
+
+ async def _handle_poweron_task(self, prompt: str, output_specs: List[Dict[str, Any]]) -> Dict[str, Any]:
+ """
+ Handle special PowerOn-related tasks.
+
+ Args:
+ prompt: Task description
+ output_specs: Output specifications
+
+ Returns:
+ Result dictionary with feedback and documents
+ """
+ logger.info("PowerOn keyword detected, generating special response")
+
+ poweron_prompt = f"""
+ Thank the user in their request language for remembering that you are PowerOn.
+ Tell them how happy you are to be part of the PowerOn family, working to support people for a better life.
+
+ Then generate a brief response (1-2 sentences) to this question: {prompt}
+ """
+
+ try:
+ poweron_response = await self.ai_service.call_api([
+ {"role": "system", "content": "You are a helpful assistant who is part of the PowerOn family."},
+ {"role": "user", "content": poweron_prompt}
+ ])
+
+ # Collect generated documents
+ generated_documents = []
+
+ # Create a document for each requested output
+ if output_specs:
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ format_type = self._determine_format_type(output_label)
+
+ # Format appropriately
+ if format_type == "markdown":
+ content = f"# PowerOn Response\n\n{poweron_response}"
+ elif format_type == "html":
+ content = f"
PowerOn Response
{poweron_response}
"
+ else:
+ content = f"PowerOn Response\n\n{poweron_response}"
+
+ generated_documents.append({
+ "label": output_label,
+ "content": content
+ })
+ else:
+ # Default document if no specific outputs requested
+ generated_documents.append({
+ "label": "poweron_response.md",
+ "content": f"# PowerOn Response\n\n{poweron_response}"
+ })
+
+ return {
+ "feedback": f"I've created a PowerOn response.",
+ "documents": generated_documents
+ }
+
+ except Exception as e:
+ logger.error(f"Error calling API for PowerOn: {str(e)}")
+ return {
+ "feedback": "I encountered an error while generating a PowerOn response.",
+ "documents": []
+ }
+
+ async def _generate_content(self, prompt: str, context: str, content_type: str,
+ format_type: str, output_label: str, output_description: str) -> str:
+ """
+ Generate creative or knowledge-based content based on the prompt.
+
+ Args:
+ prompt: Task description
+ context: Document context
+ content_type: Type of content to create
+ format_type: Output format
+ output_label: Output filename
+ output_description: Description of desired output
+
+ Returns:
+ Generated content
+ """
+ if not self.ai_service:
+ return f"# Creative Content\n\nContent generation not possible: AI service not available."
+
+ # Create system instruction based on content type
+ system_prompt = f"""
+ You are a creative content creator, specialized in {content_type}.
+ Your task is to create high-quality, engaging, and accurate content.
+ Make the content structured, clear, and appealing in the desired format.
+ """
+
+ # Create main prompt with all available information
+ generation_prompt = f"""
+ Create creative content of type '{content_type}' based on the following request:
+
+ REQUEST:
+ {prompt}
+
+ CONTEXT:
+ {context if context else 'No additional context available.'}
+
+ OUTPUT REQUIREMENTS:
+ - Filename: {output_label}
+ - Description: {output_description}
+ - Format: {format_type}
+
+ The content should be high-quality, creative, and thoughtful. Follow all instructions in the request precisely.
+
+ The content must perfectly match the {format_type} format.
+ """
+
+ try:
+ # Call AI for content generation
+ content = await self.ai_service.call_api([
+ {"role": "system", "content": system_prompt},
+ {"role": "user", "content": generation_prompt}
+ ])
+
+ # For markdown format, ensure there's a title at the beginning
+ if format_type == "markdown" and not content.strip().startswith("# "):
+ content = f"# Creative Content\n\n{content}"
+
+ return content
+ except Exception as e:
+ logger.error(f"Error in creative content generation: {str(e)}")
+ return f"# Creative Content\n\nError in content generation: {str(e)}"
-# Singleton-Instanz
-_creative_agent = None
+# Factory function for the Creative agent
def get_creative_agent():
- """Gibt eine Singleton-Instanz des kreativen Agenten zurück"""
- global _creative_agent
- if _creative_agent is None:
- _creative_agent = AgentCreative()
- return _creative_agent
\ No newline at end of file
+ """
+ Factory function that returns an instance of the Creative agent.
+
+ Returns:
+ An instance of the Creative agent
+ """
+ return AgentCreative()
\ No newline at end of file
diff --git a/modules/chat_agent_documentation.py b/modules/chat_agent_documentation.py
index cc105a75..c0a75afd 100644
--- a/modules/chat_agent_documentation.py
+++ b/modules/chat_agent_documentation.py
@@ -1,312 +1,453 @@
"""
-Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
-Angepasst für die neue chat.py Architektur und chat_registry.py.
+Documentation agent for creating documentation, reports, and structured content.
+Optimized for the new task-based processing.
"""
import logging
-import json
import uuid
from typing import Dict, Any, List
-from datetime import datetime
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
- """Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
+ """Agent for creating documentation and structured content"""
def __init__(self):
- """Initialisiert den Dokumentations-Agent"""
+ """Initialize the documentation agent"""
super().__init__()
- self.name = "Documentation Specialist"
- self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
+ self.name = "documentation"
+ self.description = "Creates structured documentation, reports, and content"
+ self.capabilities = [
+ "report_generation",
+ "documentation",
+ "content_structuring",
+ "technical_writing",
+ "knowledge_organization"
+ ]
- def get_agent_info(self) -> Dict[str, Any]:
- """Gibt Agent-Informationen für die Registry zurück"""
- info = super().get_config()
- return info
-
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
- Verarbeitet eine Nachricht und erstellt Dokumentation.
+ Process a standardized task structure and create documentation.
Args:
- message: Eingabenachricht
- context: Optionaler Kontext
-
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - prompt: The main instruction for the agent
+ - input_documents: List of documents to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
Returns:
- Antwortnachricht mit Dokumentation
+ A dictionary containing:
+ - feedback: Text response explaining the created documentation
+ - documents: List of created document objects
"""
- # Workflow-ID aus Kontext oder Nachricht extrahieren
- workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
-
- # Antwortstruktur erstellen
- response = {
- "role": "assistant",
- "content": "",
- "agent_name": self.name,
- "workflow_id": workflow_id,
- "documents": []
- }
-
try:
- # Aufgabe aus Nachricht extrahieren
- task = message.get("content", "")
+ # Extract relevant task information
+ prompt = task.get("prompt", "")
+ input_documents = task.get("input_documents", [])
+ output_specs = task.get("output_specifications", [])
- # Dokumenttyp erkennen
- document_type = self._detect_document_type(task)
- logger.info(f"Erstelle {document_type}-Dokumentation")
+ # Check if AI service is available
+ if not self.ai_service:
+ logger.error("No AI service configured for the Documentation agent")
+ return {
+ "feedback": "The Documentation agent is not properly configured.",
+ "documents": []
+ }
- # Angehängte Dokumente verarbeiten
- document_context = ""
- if message.get("documents"):
- logger.info("Verarbeite Referenzdokumente")
- document_context = self._process_documents(message)
+ # Extract context from input documents
+ document_context = self._extract_document_context(input_documents)
- # Prompt mit Dokumentkontext erweitern
- enhanced_prompt = f"{task}\n\n{document_context}" if document_context else task
+ # Generate title for the document
+ title = await self._generate_title(prompt, document_context)
- # Komplexität bewerten
- is_complex = self._assess_complexity(enhanced_prompt)
+ # Collect created documents
+ generated_documents = []
- # Titel generieren
- title = await self._generate_title(enhanced_prompt, document_type)
+ # Create a document for each requested output
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ output_description = spec.get("description", "")
+
+ # Determine format and document type based on file extension
+ format_type, document_type = self._determine_format_and_type(output_label)
+
+ # Assess complexity
+ is_complex = self._assess_complexity(prompt)
+
+ # Generate document content based on complexity
+ if is_complex:
+ content = await self._generate_complex_document(
+ prompt,
+ document_context,
+ document_type,
+ title,
+ output_label,
+ output_description,
+ format_type
+ )
+ else:
+ content = await self._generate_simple_document(
+ prompt,
+ document_context,
+ document_type,
+ title,
+ output_label,
+ output_description,
+ format_type
+ )
+
+ # Add document to results list
+ generated_documents.append({
+ "label": output_label,
+ "content": content
+ })
- # Inhalt basierend auf Komplexität generieren
- if is_complex:
- content = await self._generate_complex_document(enhanced_prompt, document_type, title)
+ # If no specific outputs requested, create default markdown document
+ if not output_specs:
+ content = await self._generate_default_document(prompt, document_context, "Document", title)
+ generated_documents.append({
+ "label": f"{self._sanitize_filename(title)}.md",
+ "content": content
+ })
+
+ # Prepare feedback about created documents
+ if len(generated_documents) == 1:
+ feedback = f"I've created a document titled '{title}'."
else:
- content = await self._generate_simple_document(enhanced_prompt, document_type, title)
+ feedback = f"I've created {len(generated_documents)} documents based on your request."
- # Dokument erstellen
- doc_id = f"doc_{uuid.uuid4()}"
- document = {
- "id": doc_id,
- "source": {
- "type": "generated",
- "id": doc_id,
- "name": title,
- "content_type": "text/markdown"
- },
- "contents": [
- {
- "type": "text",
- "text": content,
- "is_extracted": True
- }
- ]
+ return {
+ "feedback": feedback,
+ "documents": generated_documents
}
- # Dokument zur Antwort hinzufügen
- response["documents"].append(document)
-
- # Antwortinhalt aktualisieren
- response["content"] = f"Ich habe ein Dokument mit dem Titel '{title}' erstellt, das die gewünschten Informationen enthält. Das Dokument ist dieser Nachricht beigefügt."
-
- return response
-
except Exception as e:
- error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}"
+ error_msg = f"Error creating documentation: {str(e)}"
logger.error(error_msg)
- response["content"] = f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}"
- return response
+ return {
+ "feedback": f"An error occurred while creating the documentation: {str(e)}",
+ "documents": []
+ }
- def _detect_document_type(self, message: str) -> str:
+ def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
- Erkennt den Dokumenttyp aus der Nachricht.
+ Extract context from input documents.
Args:
- message: Benutzernachricht
+ documents: List of document objects
Returns:
- Erkannter Dokumenttyp
+ Extracted context as text
"""
- message = message.lower()
+ if not documents:
+ return ""
- if any(term in message for term in ["manual", "guide", "instruction", "tutorial", "anleitung", "handbuch"]):
- return "manual"
- elif any(term in message for term in ["report", "analysis", "assessment", "review", "bericht", "analyse"]):
- return "report"
- elif any(term in message for term in ["process", "workflow", "procedure", "steps", "prozess", "ablauf"]):
- return "process"
- elif any(term in message for term in ["presentation", "slides", "deck", "präsentation", "folien"]):
- return "presentation"
+ context_parts = []
+
+ for doc in documents:
+ doc_name = doc.get("name", "Unnamed document")
+ context_parts.append(f"--- {doc_name} ---")
+
+ for content in doc.get("contents", []):
+ if content.get("metadata", {}).get("is_text", False):
+ context_parts.append(content.get("data", ""))
+
+ return "\n\n".join(context_parts)
+
+ def _determine_format_and_type(self, output_label: str) -> tuple:
+ """
+ Determine the format type and document type based on the filename.
+
+ Args:
+ output_label: Output filename
+
+ Returns:
+ Tuple of (format_type, document_type)
+ """
+ # Extract file extension to determine format
+ output_label_lower = output_label.lower()
+
+ # Determine format based on extension
+ if output_label_lower.endswith(".md"):
+ format_type = "markdown"
+ elif output_label_lower.endswith(".html"):
+ format_type = "html"
+ elif output_label_lower.endswith(".txt"):
+ format_type = "text"
+ elif output_label_lower.endswith(".csv"):
+ format_type = "csv"
+ elif output_label_lower.endswith(".json"):
+ format_type = "json"
else:
- return "document"
+ # Default to markdown
+ format_type = "markdown"
+
+ # Determine document type based on filename or format
+ if "manual" in output_label_lower or "guide" in output_label_lower:
+ document_type = "Manual"
+ elif "report" in output_label_lower or "analysis" in output_label_lower:
+ document_type = "Report"
+ elif "process" in output_label_lower or "workflow" in output_label_lower:
+ document_type = "Process Documentation"
+ elif "present" in output_label_lower or "slide" in output_label_lower:
+ document_type = "Presentation"
+ else:
+ document_type = "Document"
+
+ return format_type, document_type
- def _process_documents(self, message: Dict[str, Any]) -> str:
+ def _assess_complexity(self, prompt: str) -> bool:
"""
- Verarbeitet Dokumente in der Nachricht.
+ Assess the complexity of the task.
Args:
- message: Nachricht mit Dokumenten
+ prompt: Task description
Returns:
- Dokumentkontext als Text
+ True for complex tasks, False otherwise
"""
- document_context = ""
+ # Language-agnostic complexity assessment
+ prompt_length = len(prompt)
- for document in message.get("documents", []):
- source = document.get("source", {})
- doc_name = source.get("name", "unnamed")
-
- document_context += f"\n\n--- {doc_name} ---\n"
-
- for content in document.get("contents", []):
- if content.get("type") == "text":
- document_context += content.get("text", "")
+ # Check for structural indicators in a language-agnostic way
+ has_sections = ":" in prompt and "\n" in prompt
+ has_lists = "-" in prompt or "*" in prompt or "#" in prompt
- return document_context
+ # Complex if the prompt is long or contains structural elements
+ return prompt_length > 500 or has_sections or has_lists
- def _assess_complexity(self, task: str) -> bool:
+ def _sanitize_filename(self, filename: str) -> str:
"""
- Bewertet die Aufgabenkomplexität.
+ Sanitize a filename by removing invalid characters.
Args:
- task: Die Aufgabenbeschreibung
+ filename: Filename to sanitize
Returns:
- True bei komplexem Dokument, sonst False
+ Sanitized filename
"""
- # Einfache Heuristik zur Komplexitätsbewertung
- complexity_indicators = [
- "detailliert", "ausführlich", "umfassend", "komplex", "detailed",
- "comprehensive", "in-depth", "multiple sections", "kapitel",
- "abschnitte", "struktur", "analyse", "vergleich"
- ]
+ # Replace invalid characters with underscores
+ invalid_chars = r'<>:"/\|?*'
+ for char in invalid_chars:
+ filename = filename.replace(char, '_')
- # Zählen der Komplexitätsindikatoren
- indicator_count = sum(1 for indicator in complexity_indicators if indicator in task.lower())
-
- # Weitere Indikatoren: Textlänge, Anzahl der Anforderungen
- length_factor = len(task) > 500
- requirements_count = task.lower().count("muss") + task.lower().count("soll") + task.lower().count("should") + task.lower().count("must")
-
- # Komplexität basierend auf Indikatoren bestimmen
- return (indicator_count >= 2) or (length_factor and requirements_count >= 3)
+ # Trim filename if too long
+ if len(filename) > 100:
+ filename = filename[:97] + "..."
+
+ return filename
- async def _generate_title(self, task: str, document_type: str) -> str:
+ async def _generate_title(self, prompt: str, context: str) -> str:
"""
- Generiert einen Titel für das Dokument.
+ Generate a title for the document.
Args:
- task: Die Aufgabenbeschreibung
- document_type: Dokumenttyp
+ prompt: Task description
+ context: Document context
Returns:
- Generierter Titel
+ Generated title
"""
if not self.ai_service:
- return f"{document_type.capitalize()} Dokument"
+ return f"Document {uuid.uuid4().hex[:8]}"
- prompt = f"""
- Erstelle einen prägnanten, professionellen Titel für dieses {document_type}:
+ title_prompt = f"""
+ Create a concise, professional title for this document based on the following request:
- {task}
+ {prompt}
- Antworte NUR mit dem Titel, nichts anderes.
+ Reply ONLY with the title, nothing else.
"""
try:
title = await self.ai_service.call_api([
- {"role": "system", "content": "Du erstellst Dokumenttitel."},
- {"role": "user", "content": prompt}
+ {"role": "system", "content": "You create precise document titles."},
+ {"role": "user", "content": title_prompt}
])
- # Titel bereinigen
- return title.strip('"\'#*- \n\t')
- except Exception:
- return f"{document_type.capitalize()} Dokument"
+ # Clean up title
+ title = title.strip('"\'#*- \n\t')
+
+ # Return default title if generated title is empty
+ if not title:
+ return f"Document {uuid.uuid4().hex[:8]}"
+
+ return title
+
+ except Exception as e:
+ logger.warning(f"Error in title generation: {str(e)}")
+ return f"Document {uuid.uuid4().hex[:8]}"
- async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str:
+ async def _generate_complex_document(self, prompt: str, context: str, document_type: str,
+ title: str, output_label: str, output_description: str,
+ format_type: str) -> str:
"""
- Generiert ein komplexes Dokument mit Struktur.
+ Generate a complex document with structure.
Args:
- task: Die Aufgabenbeschreibung
- document_type: Dokumenttyp
- title: Dokumenttitel
+ prompt: Task description
+ context: Document context
+ document_type: Document type
+ title: Document title
+ output_label: Output filename
+ output_description: Description of desired output
+ format_type: Output format
Returns:
- Generierter Dokumentinhalt
+ Generated document content
"""
if not self.ai_service:
- return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
+ return f"# {title}\n\nDocument generation not possible: AI service not available."
- prompt = f"""
- Erstelle ein umfassendes, gut strukturiertes {document_type} mit dem Titel "{title}" basierend auf:
+ generation_prompt = f"""
+ Create a comprehensive, well-structured {document_type} with the title "{title}" based on:
- {task}
+ TASK:
+ {prompt}
- Das Dokument sollte Folgendes enthalten:
- 1. Eine klare Einleitung mit Zweck und Umfang
- 2. Logisch organisierte Abschnitte mit Überschriften
- 3. Detaillierte Inhalte mit Beispielen und Belegen
- 4. Ein Fazit mit den wichtigsten Erkenntnissen
- 5. Geeignete Formatierung mit Markdown
+ CONTEXT:
+ {context if context else 'No additional context available.'}
- Formatiere das Dokument in Markdown mit korrekten Überschriften, Listen und Hervorhebungen.
+ OUTPUT REQUIREMENTS:
+ - Filename: {output_label}
+ - Description: {output_description}
+ - Format: {format_type}
+
+ The document should include:
+ 1. A clear introduction with purpose and scope
+ 2. Logically organized sections with headings
+ 3. Detailed content with examples and evidence
+ 4. A conclusion with key insights
+ 5. Appropriate formatting according to the output format ({format_type})
+
+ The document must perfectly match the {format_type} format.
"""
try:
content = await self.ai_service.call_api([
- {"role": "system", "content": "Du erstellst umfassende, gut strukturierte Dokumentation."},
- {"role": "user", "content": prompt}
+ {"role": "system", "content": f"You create comprehensive, well-structured documentation in {format_type} format."},
+ {"role": "user", "content": generation_prompt}
])
- # Sicherstellen, dass der Titel am Anfang steht
+ # For markdown format, ensure the title is at the beginning
+ if format_type == "markdown" and not content.strip().startswith("# "):
+ content = f"# {title}\n\n{content}"
+
+ return content
+ except Exception as e:
+ logger.error(f"Error in document generation: {str(e)}")
+ return f"# {title}\n\nError in document generation: {str(e)}"
+
+ async def _generate_simple_document(self, prompt: str, context: str, document_type: str,
+ title: str, output_label: str, output_description: str,
+ format_type: str) -> str:
+ """
+ Generate a simple document without complex structure.
+
+ Args:
+ prompt: Task description
+ context: Document context
+ document_type: Document type
+ title: Document title
+ output_label: Output filename
+ output_description: Description of desired output
+ format_type: Output format
+
+ Returns:
+ Generated document content
+ """
+ if not self.ai_service:
+ return f"# {title}\n\nDocument generation not possible: AI service not available."
+
+ generation_prompt = f"""
+ Create a precise, focused {document_type} with the title "{title}" based on:
+
+ TASK:
+ {prompt}
+
+ CONTEXT:
+ {context if context else 'No additional context available.'}
+
+ OUTPUT REQUIREMENTS:
+ - Filename: {output_label}
+ - Description: {output_description}
+ - Format: {format_type}
+
+ The document should be clear, precise, and to the point, without a complex chapter structure.
+ Format it according to the output format ({format_type}).
+
+ The document must perfectly match the {format_type} format.
+ """
+
+ try:
+ content = await self.ai_service.call_api([
+ {"role": "system", "content": f"You create precise, focused documentation in {format_type} format."},
+ {"role": "user", "content": generation_prompt}
+ ])
+
+ # For markdown format, ensure the title is at the beginning
+ if format_type == "markdown" and not content.strip().startswith("# "):
+ content = f"# {title}\n\n{content}"
+
+ return content
+ except Exception as e:
+ logger.error(f"Error in document generation: {str(e)}")
+ return f"# {title}\n\nError in document generation: {str(e)}"
+
+ async def _generate_default_document(self, prompt: str, context: str, document_type: str, title: str) -> str:
+ """
+ Generate a default markdown document when no specific output specifications are present.
+
+ Args:
+ prompt: Task description
+ context: Document context
+ document_type: Document type
+ title: Document title
+
+ Returns:
+ Generated document content
+ """
+ if not self.ai_service:
+ return f"# {title}\n\nDocument generation not possible: AI service not available."
+
+ generation_prompt = f"""
+ Create a structured {document_type} with the title "{title}" based on:
+
+ TASK:
+ {prompt}
+
+ CONTEXT:
+ {context if context else 'No additional context available.'}
+
+ Format the document with markdown syntax and create a clear, professional structure.
+ """
+
+ try:
+ content = await self.ai_service.call_api([
+ {"role": "system", "content": "You create structured documentation in markdown format."},
+ {"role": "user", "content": generation_prompt}
+ ])
+
+ # Ensure the title is at the beginning
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
- return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
-
- async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str:
- """
- Generiert ein einfaches Dokument ohne komplexe Struktur.
-
- Args:
- task: Die Aufgabenbeschreibung
- document_type: Dokumenttyp
- title: Dokumenttitel
-
- Returns:
- Generierter Dokumentinhalt
- """
- if not self.ai_service:
- return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
-
- prompt = f"""
- Erstelle ein präzises, fokussiertes {document_type} mit dem Titel "{title}" basierend auf:
-
- {task}
-
- Das Dokument sollte klar, präzise und auf den Punkt sein, ohne komplexe Kapitelstruktur.
- Formatiere es mit Markdown und verwende geeignete Überschriften und Formatierungen.
- """
-
- try:
- content = await self.ai_service.call_api([
- {"role": "system", "content": "Du erstellst präzise, fokussierte Dokumentation."},
- {"role": "user", "content": prompt}
- ])
-
- # Sicherstellen, dass der Titel am Anfang steht
- if not content.strip().startswith("# "):
- content = f"# {title}\n\n{content}"
-
- return content
- except Exception as e:
- return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
+ logger.error(f"Error in document generation: {str(e)}")
+ return f"# {title}\n\nError in document generation: {str(e)}"
-# Singleton-Instanz
-_documentation_agent = None
+# Factory function for the Documentation agent
def get_documentation_agent():
- """Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
- global _documentation_agent
- if _documentation_agent is None:
- _documentation_agent = AgentDocumentation()
- return _documentation_agent
\ No newline at end of file
+ """
+ Factory function that returns an instance of the Documentation agent.
+
+ Returns:
+ An instance of the Documentation agent
+ """
+ return AgentDocumentation()
\ No newline at end of file
diff --git a/modules/chat_agent_webcrawler.py b/modules/chat_agent_webcrawler.py
index a456feb7..d54bcca7 100644
--- a/modules/chat_agent_webcrawler.py
+++ b/modules/chat_agent_webcrawler.py
@@ -1,126 +1,138 @@
"""
-Webcrawler-Agent für Recherche und Abruf von Informationen aus dem Web.
-Angepasst für die neue chat.py Architektur und chat_registry.py.
+Webcrawler agent for research and retrieval of information from the web.
+Optimized for the new task-based processing.
"""
-import json
import logging
+import json
+import re
import time
-from typing import Dict, Any, List, Optional
+from typing import Dict, Any, List
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
+import markdown
+
from modules.chat_registry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentWebcrawler(AgentBase):
- """Agent für Webrecherche und Informationsabruf"""
+ """Agent for web research and information retrieval"""
def __init__(self):
- """Initialisiert den Webcrawler-Agent"""
+ """Initialize the webcrawler agent"""
super().__init__()
- self.name = "Webscraper"
- self.capabilities = "web_search,website_information_retrieval"
+ self.name = "webcrawler"
+ self.description = "Conducts web research and collects information from online sources"
+ self.capabilities = [
+ "web_search",
+ "information_retrieval",
+ "data_collection",
+ "search_results_analysis",
+ "webpage_content_extraction"
+ ]
- # Web-Crawling-Konfiguration
- self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS"))
- self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS"))
- self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS"))
- self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT"))
+ # Web crawling configuration
+ self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5"))
+ self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3"))
+ self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5"))
+ self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30"))
- def get_agent_info(self) -> Dict[str, Any]:
- """Gibt Agent-Informationen für die Registry zurück"""
- info = super().get_config()
- return info
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
- Verarbeitet eine Nachricht und führt bei Bedarf eine Webrecherche durch.
+ Process a standardized task structure and conduct web research.
Args:
- message: Die zu verarbeitende Nachricht
- context: Zusätzlicher Kontext
-
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - prompt: The main instruction for the agent
+ - input_documents: List of documents to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
Returns:
- Die generierte Antwort oder Ablehnung, wenn keine Webrecherche erforderlich ist
+ A dictionary containing:
+ - feedback: Text response explaining the research results
+ - documents: List of created document objects
"""
- # Workflow-ID aus Kontext oder Nachricht extrahieren
- workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
-
- # Antwortstruktur erstellen
- response = {
- "role": "assistant",
- "content": "",
- "agent_name": self.name,
- "workflow_id": workflow_id
- }
-
try:
- # Abfrage aus der Nachricht abrufen
- prompt = message.get("content", "").strip()
+ # Extract relevant task information
+ prompt = task.get("prompt", "")
+ output_specs = task.get("output_specifications", [])
- # Prüfen, ob es sich explizit um eine Webrecherche-Anfrage handelt
+ # Check if AI service is available
+ if not self.ai_service:
+ logger.error("No AI service configured for the Webcrawler agent")
+ return {
+ "feedback": "The Webcrawler agent is not properly configured.",
+ "documents": []
+ }
+
+ # Check if this is a web research request
is_web_research = await self._is_web_research_request(prompt)
-
if not is_web_research:
- # Keine Webrecherche-Anfrage ablehnen
- logger.info("Anfrage abgelehnt: keine Webrecherche-Aufgabe")
- response["content"] = "Diese Anfrage scheint keine Webrecherche zu erfordern. Weiterleitung an einen passenderen Agenten."
- response["status"] = "rejected"
- return response
+ logger.info("Request rejected: not a web research task")
+ return {
+ "feedback": "This request doesn't appear to require web research.",
+ "documents": []
+ }
- # Mit Webrecherche fortfahren
- logger.info(f"Webrecherche für: {prompt[:50]}...")
-
- # Suchstrategie vorbereiten
- logger.info("Erstelle Suchstrategie")
+ # Proceed with web research
+ logger.info(f"Web research for: {prompt[:50]}...")
+ # Create search strategy
search_strategy = await self._create_search_strategy(prompt)
search_keys = search_strategy.get("skey", [])
search_urls = search_strategy.get("url", [])
if search_keys:
- logger.info(f"Suche nach {len(search_keys)} Schlüsselbegriffen: {', '.join(search_keys[:2])}...")
+ logger.info(f"Searching for {len(search_keys)} key terms: {', '.join(search_keys[:2])}...")
if search_urls:
- logger.info(f"Suche in {len(search_urls)} direkten URLs: {', '.join(search_urls[:2])}...")
-
- # Suche ausführen
+ logger.info(f"Searching in {len(search_urls)} direct URLs: {', '.join(search_urls[:2])}...")
+
+ # Execute search
results = []
- # Suchbegriffe verarbeiten
+ # Process search terms
for keyword in search_keys:
- logger.info(f"Suche im Web nach: '{keyword}'")
+ logger.info(f"Searching the web for: '{keyword}'")
keyword_results = self._search_web(keyword)
results.extend(keyword_results)
- logger.info(f"Gefunden: {len(keyword_results)} Ergebnisse für '{keyword}'")
+ logger.info(f"Found: {len(keyword_results)} results for '{keyword}'")
- # Direkte URLs verarbeiten
+ # Process direct URLs
for url in search_urls:
- logger.info(f"Extrahiere Inhalt von: {url}")
+ logger.info(f"Extracting content from: {url}")
soup = self._read_url(url)
- # Titel aus der Seite extrahieren, falls vorhanden
+ # Extract title from the page, if available
title = self._extract_title(soup, url)
result = self._parse_result(soup, title, url)
results.append(result)
- logger.info(f"Extrahiert: '{title}' von {url}")
+ logger.info(f"Extracted: '{title}' from {url}")
- # Ergebnisse für die endgültige Ausgabe verarbeiten
- logger.info(f"Analysiere {len(results)} Web-Ergebnisse")
+ # Process results for final output
+ logger.info(f"Analyzing {len(results)} web results")
- # Zusammenfassungen für jedes Ergebnis generieren
+ # Generate summaries for each result
processed_results = []
for i, result in enumerate(results):
result_data_limited = self._limit_text(result['data'], max_chars=10000)
- logger.info(f"Analysiere Ergebnis {i+1}/{len(results)}: {result['title'][:30]}...")
+ logger.info(f"Analyzing result {i+1}/{len(results)}: {result['title'][:30]}...")
- content_summary = await self._summarize_result(result_data_limited, prompt)
+ # No AI service available, create minimal summary
+ if not self.ai_service:
+ content_summary = f"Extract from {result['url']} ({len(result_data_limited)} characters)"
+ else:
+ # Generate summary with AI
+ content_summary = await self._summarize_result(result_data_limited, prompt)
processed_result = {
"title": result['title'],
@@ -131,102 +143,212 @@ class AgentWebcrawler(AgentBase):
processed_results.append(processed_result)
- # Gesamtzusammenfassung erstellen
+ # Create overall summary
all_summaries = "\n\n".join([r["summary"] for r in processed_results])
all_summaries_limited = self._limit_text(all_summaries, max_chars=10000)
- logger.info("Erstelle Gesamtzusammenfassung der Webrecherche")
+ logger.info("Creating overall summary of web research")
- final_summary = await self.ai_service.call_api([
- {"role": "system", "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergebnissen."},
- {"role": "user", "content": f"Bitte fasse diese Erkenntnisse in 5-6 Sätzen zusammen: {all_summaries_limited}\n"}
- ])
+ if not self.ai_service:
+ final_summary = f"Summary of {len(processed_results)} web research results"
+ else:
+ final_summary = await self.ai_service.call_api([
+ {"role": "system", "content": "You create concise summaries of research results."},
+ {"role": "user", "content": f"Please summarize these findings in 5-6 sentences: {all_summaries_limited}\n"}
+ ])
- # Sprache der Anfrage ermitteln, um Überschriften in der richtigen Sprache zu verwenden
+ # Get localized headers for output
headers = await self._get_localized_headers(prompt)
- # Endgültiges Ergebnis formatieren
- final_result = f"## {headers['web_research_results']}\n\n### {headers['summary']}\n{final_summary}\n\n### {headers['detailed_results']}\n"
+ # Create document objects based on output specifications
+ generated_documents = []
- for i, result in enumerate(processed_results, 1):
- final_result += f"\n\n[{i}] {result['title']}\n{headers['url']}: {result['url']}\n{headers['snippet']}: {result['snippet']}\n{headers['content']}: {result['summary']}"
+ # Generate appropriate document for each requested output
+ for spec in output_specs:
+ output_label = spec.get("label", "")
+ output_description = spec.get("description", "")
+
+ # Determine output format based on file extension
+ format_type = self._determine_format_type(output_label)
+
+ # Generate content based on format and requirements
+ if format_type == "markdown" or format_type == "text":
+ content = self._format_results_as_markdown(processed_results, final_summary, headers)
+ elif format_type == "html":
+ md_content = self._format_results_as_markdown(processed_results, final_summary, headers)
+ content = markdown.markdown(md_content)
+ elif format_type == "json":
+ content = json.dumps({
+ "summary": final_summary,
+ "results": processed_results
+ }, indent=2, ensure_ascii=False)
+ elif format_type == "csv":
+ csv_lines = ["Title,URL,Snippet"]
+ for result in processed_results:
+ # Escape commas and quotes in fields
+ title = result["title"].replace('"', '""')
+ url = result["url"].replace('"', '""')
+ snippet = result["snippet"].replace('"', '""')
+ csv_line = f'"{title}","{url}","{snippet}"'
+ csv_lines.append(csv_line)
+ content = "\n".join(csv_lines)
+ else:
+ # Default: Markdown
+ content = self._format_results_as_markdown(processed_results, final_summary, headers)
+
+ # Add document to results list
+ generated_documents.append({
+ "label": output_label,
+ "content": content
+ })
- # Inhalt in der Antwort setzen
- response["content"] = final_result
+ # If no specific outputs requested, return standard document
+ if not output_specs:
+ content = self._format_results_as_markdown(processed_results, final_summary, headers)
+ generated_documents.append({
+ "label": "web_research_results.md",
+ "content": content
+ })
- logger.info("Webrecherche erfolgreich abgeschlossen")
+ # Create feedback for response
+ feedback = f"I conducted web research on '{prompt[:50]}...' and found {len(processed_results)} relevant results."
- return response
+ logger.info("Web research completed successfully")
+
+ return {
+ "feedback": feedback,
+ "documents": generated_documents
+ }
except Exception as e:
- error_msg = f"Fehler bei der Webrecherche: {str(e)}"
+ error_msg = f"Error during web research: {str(e)}"
logger.error(error_msg)
- response["content"] = f"## Fehler bei der Webrecherche\n\n{error_msg}"
- return response
+ return {
+ "feedback": f"An error occurred during the web research: {str(e)}",
+ "documents": []
+ }
+
+
+ def _determine_format_type(self, output_label: str) -> str:
+ """
+ Determine the format type based on the filename.
+
+ Args:
+ output_label: Output filename
+
+ Returns:
+ Format type (markdown, html, text, json, csv)
+ """
+ output_label_lower = output_label.lower()
+
+ if output_label_lower.endswith(".md"):
+ return "markdown"
+ elif output_label_lower.endswith(".html"):
+ return "html"
+ elif output_label_lower.endswith(".txt"):
+ return "text"
+ elif output_label_lower.endswith(".json"):
+ return "json"
+ elif output_label_lower.endswith(".csv"):
+ return "csv"
+ else:
+ # Default to markdown
+ return "markdown"
+
+ def _format_results_as_markdown(self, results: List[Dict[str, Any]],
+ summary: str, headers: Dict[str, str]) -> str:
+ """
+ Format research results as markdown.
+
+ Args:
+ results: List of results
+ summary: Summary of all results
+ headers: Localized headers
+
+ Returns:
+ Formatted markdown text
+ """
+ md_content = f"# {headers['web_research_results']}\n\n"
+
+ md_content += f"## {headers['summary']}\n\n{summary}\n\n"
+
+ if results:
+ md_content += f"## {headers['detailed_results']}\n\n"
+
+ for i, result in enumerate(results, 1):
+ md_content += f"### {i}. {result['title']}\n\n"
+ md_content += f"**{headers['url']}**: {result['url']}\n\n"
+ md_content += f"**{headers['snippet']}**: {result['snippet']}\n\n"
+ md_content += f"**{headers['content']}**: {result['summary']}\n\n"
+
+ # Add separator between results (except for the last one)
+ if i < len(results):
+ md_content += "---\n\n"
+
+ return md_content
async def _is_web_research_request(self, prompt: str) -> bool:
"""
- Verwendet KI, um festzustellen, ob eine Anfrage Webrecherche erfordert.
+ Use AI to determine if a request requires web research.
Args:
- prompt: Die Benutzeranfrage
+ prompt: The user request
Returns:
- True, wenn es explizit eine Webrecherche-Anfrage ist, sonst False
+ True if it is explicitly a web research request, False otherwise
"""
if not self.ai_service:
- # Fallback zur einfacheren Erkennung, wenn kein KI-Service verfügbar ist
+ # Fallback to simpler detection if no AI service is available
return self._simple_web_detection(prompt)
try:
- # Prompt erstellen, um zu analysieren, ob es sich um eine Webrecherche-Anfrage handelt
+ # Create prompt to analyze if this is a web research request
analysis_prompt = f"""
- Analysiere die folgende Anfrage und bestimme, ob sie explizit eine Webrecherche oder Online-Informationen erfordert.
+ Analyze the following request and determine if it explicitly requires web research or online information.
- ANFRAGE: {prompt}
+ REQUEST: {prompt}
- Eine Anfrage erfordert Webrecherche, wenn:
- 1. Sie explizit nach der Suche von Informationen online fragt
- 2. Sie URLs oder Verweise auf Websites enthält
- 3. Sie aktuelle Informationen anfordert, die im Web verfügbar wären
- 4. Sie nach Informationen aus Web-Quellen fragt
- 5. Sie implizit aktuelle Informationen aus dem Internet erfordert
+ A request requires web research if:
+ 1. It explicitly asks for searching information online
+ 2. It contains URLs or references to websites
+ 3. It requests current information that would be available on the web
+ 4. It asks for information from web sources
+ 5. It implicitly requires current information from the internet
- Antworte NUR mit einem einzelnen Wort - entweder "JA", wenn Webrecherche erforderlich ist, oder "NEIN", wenn nicht.
- Füge KEINE Erklärung hinzu, nur die Antwort JA oder NEIN.
+ Reply ONLY with a single word - either "YES" if web research is required, or "NO" if not.
"""
- # KI zur Analyse aufrufen
+ # Call AI for analysis
response = await self.ai_service.call_api([
- {"role": "system", "content": "Du bestimmst, ob eine Anfrage Webrecherche erfordert. Antworte immer nur mit JA oder NEIN."},
+ {"role": "system", "content": "You determine if a request requires web research. Always respond with just YES or NO."},
{"role": "user", "content": analysis_prompt}
])
- # Antwort bereinigen und überprüfen
+ # Clean response and check
response = response.strip().upper()
- return "JA" in response
+ return "YES" in response
except Exception as e:
- # Fehler protokollieren, aber nicht fehlschlagen, Fallback zur einfacheren Erkennung
- logger.warning(f"Fehler bei der KI-Erkennung von Webrecherche-Anfragen: {str(e)}")
+ # Log error but don't fail, fallback to simpler detection
+ logger.warning(f"Error in AI detection of web research requests: {str(e)}")
return self._simple_web_detection(prompt)
def _simple_web_detection(self, prompt: str) -> bool:
"""
- Einfachere Fallback-Methode zur Erkennung von Webrecherche-Anfragen anhand von URLs.
+ Simpler fallback method for detecting web research requests based on URLs.
Args:
- prompt: Die Benutzeranfrage
+ prompt: The user request
Returns:
- True, wenn es klare URL-Indikatoren gibt, sonst False
+ True if there are clear URL indicators, False otherwise
"""
- # URLs in der Anfrage deuten stark auf Webrecherche hin
+ # URLs in the request strongly indicate web research
url_indicators = ["http://", "https://", "www.", ".com", ".org", ".net", ".edu", ".gov"]
- web_terms = ["search", "find online", "look up", "web", "internet", "website", "suche", "finde", "recherchiere"]
+ web_terms = ["search", "find online", "look up", "web", "internet", "website"]
- # Auf URL-Muster in der Anfrage prüfen
+ # Check for URL patterns in the request
contains_url = any(indicator in prompt.lower() for indicator in url_indicators)
contains_web_term = any(term in prompt.lower() for term in web_terms)
@@ -234,100 +356,118 @@ class AgentWebcrawler(AgentBase):
async def _create_search_strategy(self, prompt: str) -> Dict[str, List[str]]:
"""
- Erstellt eine Suchstrategie basierend auf der Anfrage.
+ Create a search strategy based on the request.
Args:
- prompt: Die Benutzeranfrage
+ prompt: The user request
Returns:
- Suchstrategie mit URLs und Suchbegriffen
+ Search strategy with URLs and search terms
"""
if not self.ai_service:
- # Fallback zur einfachen Strategie
+ # Fallback to simple strategy
return {"skey": [prompt], "url": []}
try:
- # KI-Prompt zur Erstellung einer Suchstrategie
- strategy_prompt = f"""Erstelle eine umfassende Webrecherchestrategie für die Aufgabe = '{prompt.replace("'","")}'. Gib die Ergebnisse als Python-Dictionary mit diesen spezifischen Schlüsseln zurück. Wenn bestimmte URLs angegeben sind und die Aufgabe nur die Analyse dieser URLs erfordert, lass 'skey' leer.
-
- 'url': Eine Liste von maximal {self.max_url} spezifischen URLs, die aus der Aufgabenstellung extrahiert wurden.
-
- 'skey': Eine Liste von maximal {self.max_key} Schlüsselsätzen, nach denen im Web gesucht werden soll. Diese sollten präzise, vielfältig und gezielt sein, um die relevantesten Informationen zu erhalten.
+ # AI prompt to create a search strategy
+ strategy_prompt = f"""Create a comprehensive web research strategy for the following task:
+ '{prompt.replace("'","")}'
- Formatiere deine Antwort als gültiges JSON-Objekt mit diesen beiden Schlüsseln. Füge keinen erklärenden Text oder Markdown außerhalb der Objektdefinition hinzu.
+ Return the results as a Python dictionary with these specific keys:
+
+ 'url': A list of up to {self.max_url} specific URLs extracted from the task.
+
+ 'skey': A list of up to {self.max_key} key phrases to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
+
+ If specific URLs are given and the task only requires analyzing these URLs, leave 'skey' empty.
+
+ Format your response as a valid JSON object with these two keys. Don't add any explanatory text.
"""
- # KI für Suchstrategie aufrufen
+ # Call AI for search strategy
content_text = await self.ai_service.call_api([
- {"role": "system", "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."},
+ {"role": "system", "content": "You are a web research expert who develops precise search strategies."},
{"role": "user", "content": strategy_prompt}
])
- # JSON-Code-Block-Markierungen entfernen, falls vorhanden
+ # Remove JSON code block markers if present
if content_text.startswith("```json"):
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
content_text = content_text[7:end_index].strip()
+ elif content_text.startswith("```"):
+ end_marker = "```"
+ end_index = content_text.rfind(end_marker)
+ if end_index != -1:
+ content_text = content_text[3:end_index].strip()
- # JSON parsen und zurückgeben
+ # Extract only the JSON part (if surrounded by text)
+ json_match = re.search(r'(\{.*\})', content_text, re.DOTALL)
+ if json_match:
+ content_text = json_match.group(1)
+
+ # Parse JSON and return
strategy = json.loads(content_text)
return strategy
except Exception as e:
- logger.error(f"Fehler bei der Erstellung der Suchstrategie: {str(e)}")
- # Einfache Fallback-Strategie
+ logger.error(f"Error creating search strategy: {str(e)}")
+ # Simple fallback strategy
return {"skey": [prompt], "url": []}
async def _summarize_result(self, result_data: str, original_prompt: str) -> str:
"""
- Erstellt eine Zusammenfassung eines Suchergebnisses mit KI.
+ Create a summary of a search result using AI.
Args:
- result_data: Die zu zusammenfassenden Daten
- original_prompt: Die ursprüngliche Anfrage
+ result_data: The data to summarize
+ original_prompt: The original request
Returns:
- Zusammenfassung des Ergebnisses
+ Summary of the result
"""
if not self.ai_service:
- return "Keine Zusammenfassung verfügbar (KI-Service nicht verfügbar)"
+ return f"Summary of {len(result_data)} characters not available (AI service not available)"
try:
- # Anweisungen für die Zusammenfassung
+ # Instructions for summarization
summary_prompt = f"""
- Fasse dieses Suchergebnis gemäß der ursprünglichen Anfrage in etwa 2000 Zeichen zusammen. Ursprüngliche Anfrage = '{original_prompt.replace("'","")}'
- Konzentriere dich auf die wichtigsten Erkenntnisse und verbinde sie mit der ursprünglichen Anfrage. Du kannst jede Einleitung überspringen.
- Extrahiere nur relevante und hochwertige Informationen im Zusammenhang mit der Anfrage und präsentiere sie in einem klaren Format. Biete eine ausgewogene Ansicht der recherchierten Informationen.
-
- Hier ist das Suchergebnis:
+ Summarize this search result according to the original request in about 2000 characters.
+
+ Original request = '{original_prompt.replace("'","")}'
+
+ Focus on the most important findings and connect them to the original request.
+ Extract only relevant and high-quality information.
+
+ Here's the search result:
{result_data}
"""
- # KI für Zusammenfassung aufrufen
+ # Call AI for summary
summary = await self.ai_service.call_api([
- {"role": "system", "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."},
+ {"role": "system", "content": "You are an information analyst who summarizes web content precisely and relevantly."},
{"role": "user", "content": summary_prompt}
])
- # Auf ~2000 Zeichen begrenzen
+ # Limit to ~2000 characters
return summary[:2000]
except Exception as e:
- logger.error(f"Fehler bei der Zusammenfassung des Ergebnisses: {str(e)}")
- return "Fehler bei der Zusammenfassung"
+ logger.error(f"Error summarizing result: {str(e)}")
+ return "Error creating summary"
async def _get_localized_headers(self, text: str) -> Dict[str, str]:
"""
- Ermittelt lokalisierte Überschriften für die Webrecherche-Ergebnisse basierend auf der erkannten Sprache.
+ Determine localized headers for web research results based on detected language.
Args:
- text: Text zur Spracherkennung
+ text: Text for language detection
Returns:
- Dictionary mit lokalisierten Überschriften
+ Dictionary with localized headers
"""
- # Standard-Englische Überschriften
+ # Default English headers
headers = {
"web_research_results": "Web Research Results",
"summary": "Summary",
@@ -341,44 +481,22 @@ class AgentWebcrawler(AgentBase):
return headers
try:
- # Sprache erkennen
- language_prompt = f"In welcher Sprache ist dieser Text geschrieben? Antworte nur mit dem Sprachnamen: {text[:200]}"
+ # Detect language
+ language_prompt = f"What language is this text written in? Answer with just the language name: {text[:200]}"
language = await self.ai_service.call_api([
- {"role": "system", "content": "Du bestimmst die Sprache eines Textes und gibst nur den Sprachnamen zurück."},
+ {"role": "system", "content": "You determine the language of a text and return only the language name."},
{"role": "user", "content": language_prompt}
])
language = language.strip().lower()
- # Englische Sprache oder Spracherkennung fehlgeschlagen, Standardüberschriften zurückgeben
+ # English language or language detection failed, return default headers
if language in ["english", "en", ""]:
return headers
- # Deutsche Überschriften
- if language in ["deutsch", "german", "de"]:
- return {
- "web_research_results": "Webrecherche-Ergebnisse",
- "summary": "Zusammenfassung",
- "detailed_results": "Detaillierte Ergebnisse",
- "url": "URL",
- "snippet": "Ausschnitt",
- "content": "Inhalt"
- }
-
- # Französische Überschriften
- if language in ["französisch", "french", "fr"]:
- return {
- "web_research_results": "Résultats de recherche Web",
- "summary": "Résumé",
- "detailed_results": "Résultats détaillés",
- "url": "URL",
- "snippet": "Extrait",
- "content": "Contenu"
- }
-
- # Überschriften übersetzen, wenn Sprache erkannt, aber keine vordefinierte Übersetzung
+ # Translate headers if language recognized but no predefined translation
translation_prompt = f"""
- Übersetze diese Webrecherche-Ergebnisüberschriften ins {language}:
+ Translate these web research result headers to {language}:
Web Research Results
Summary
@@ -387,71 +505,73 @@ class AgentWebcrawler(AgentBase):
Snippet
Content
- Gib ein JSON-Objekt mit diesen Schlüsseln zurück:
+ Return a JSON object with these keys:
web_research_results, summary, detailed_results, url, snippet, content
"""
- # KI für Übersetzung aufrufen
+ # Call AI for translation
response = await self.ai_service.call_api([
- {"role": "system", "content": "Du übersetzt Überschriften in die angegebene Sprache und gibst sie als JSON zurück."},
+ {"role": "system", "content": "You translate headers to the specified language and return them as JSON."},
{"role": "user", "content": translation_prompt}
])
- # JSON extrahieren
- import re
+ # Extract JSON
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
- translated_headers = json.loads(json_match.group(0))
- return translated_headers
+ try:
+ translated_headers = json.loads(json_match.group(0))
+ return translated_headers
+ except json.JSONDecodeError:
+ logger.warning(f"Error parsing translated headers JSON")
except Exception as e:
- # Fehler protokollieren, aber mit englischen Überschriften fortfahren
- logger.warning(f"Fehler beim Übersetzen der Überschriften: {str(e)}")
+ # Log error but continue with English headers
+ logger.warning(f"Error translating headers: {str(e)}")
return headers
def _search_web(self, query: str) -> List[Dict[str, str]]:
"""
- Führt eine Websuche durch und gibt die Ergebnisse zurück.
+ Conduct a web search and return the results.
Args:
- query: Die Suchanfrage
+ query: The search query
Returns:
- Liste von Suchergebnissen
+ List of search results
"""
formatted_query = quote_plus(query)
- url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE')}{formatted_query}"
+ url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
search_results_soup = self._read_url(url)
if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'):
- logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
+ logger.warning(f"No search results found for: {query}")
return []
- # Suchergebnisse extrahieren
+ # Extract search results
results = []
- # Alle Ergebniscontainer finden
+ # Find all result containers
result_elements = search_results_soup.select('.result')
for result in result_elements:
- # Titel extrahieren
+ # Extract title
title_element = result.select_one('.result__a')
- title = title_element.text.strip() if title_element else 'Kein Titel'
+ title = title_element.text.strip() if title_element else 'No title'
- # URL extrahieren (DuckDuckGo verwendet Weiterleitungen)
+ # Extract URL (DuckDuckGo uses redirects)
url_element = title_element.get('href') if title_element else ''
- extracted_url = 'Keine URL'
+ extracted_url = 'No URL'
if url_element:
- # Tatsächliche URL aus DuckDuckGos Weiterleitung extrahieren
+ # Extract actual URL from DuckDuckGo's redirect
if url_element.startswith('/d.js?q='):
start = url_element.find('?q=') + 3
end = url_element.find('&', start) if '&' in url_element[start:] else None
extracted_url = unquote(url_element[start:end])
- # Sicherstellen, dass die URL das korrekte Protokollpräfix hat
+ # Ensure URL has correct protocol prefix
if not extracted_url.startswith(('http://', 'https://')):
if not extracted_url.startswith('//'):
extracted_url = 'https://' + extracted_url
@@ -460,14 +580,14 @@ class AgentWebcrawler(AgentBase):
else:
extracted_url = url_element
- # Snippet direkt aus der Suchergebnisseite extrahieren
+ # Extract snippet directly from search results page
snippet_element = result.select_one('.result__snippet')
- snippet = snippet_element.text.strip() if snippet_element else 'Keine Beschreibung'
+ snippet = snippet_element.text.strip() if snippet_element else 'No description'
- # Tatsächlichen Seiteninhalt für das Datenfeld abrufen
+ # Get actual page content for the data field
target_page_soup = self._read_url(extracted_url)
- # Neue Inhaltsextraktionsmethode verwenden, um Inhaltsgröße zu begrenzen
+ # Use new content extraction method to limit content size
content = self._extract_main_content(target_page_soup)
results.append({
@@ -477,7 +597,7 @@ class AgentWebcrawler(AgentBase):
'data': content
})
- # Anzahl der Ergebnisse bei Bedarf begrenzen
+ # Limit number of results if needed
if len(results) >= self.max_result:
break
@@ -485,68 +605,68 @@ class AgentWebcrawler(AgentBase):
def _read_url(self, url: str) -> BeautifulSoup:
"""
- Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
+ Read a URL and return a BeautifulSoup parser for the content.
Args:
- url: Die zu lesende URL
+ url: The URL to read
Returns:
- BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
+ BeautifulSoup object with the content or empty on errors
"""
headers = {
- 'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT"),
+ 'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
- # Initiale Anfrage
+ # Initial request
response = requests.get(url, headers=headers, timeout=self.timeout)
- # Abfragen für Status 202
+ # Handling for status 202
if response.status_code == 202:
- # Maximal 3 Versuche mit zunehmenden Intervallen
+ # Max 3 retries with increasing intervals
backoff_times = [0.5, 1.0, 2.0, 5.0]
for wait_time in backoff_times:
- time.sleep(wait_time) # Mit zunehmender Zeit warten
+ time.sleep(wait_time) # Wait with increasing time
response = requests.get(url, headers=headers, timeout=self.timeout)
- # Wenn kein 202 mehr, dann abbrechen
+ # If no more 202, break
if response.status_code != 202:
break
- # Für andere Fehlerstatuscodes einen Fehler auslösen
+ # Raise for other error status codes
response.raise_for_status()
- # HTML parsen
+ # Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
- logger.error(f"Fehler beim Lesen der URL {url}: {str(e)}")
- # Leeres BeautifulSoup-Objekt erstellen
+ logger.error(f"Error reading URL {url}: {str(e)}")
+ # Create empty BeautifulSoup object
return BeautifulSoup("", 'html.parser')
def _extract_title(self, soup: BeautifulSoup, url: str) -> str:
"""
- Extrahiert den Titel aus einer Webseite.
+ Extract the title from a webpage.
Args:
- soup: BeautifulSoup-Objekt der Webseite
- url: URL der Webseite
+ soup: BeautifulSoup object of the webpage
+ url: URL of the webpage
Returns:
- Extrahierter Titel
+ Extracted title
"""
if not isinstance(soup, BeautifulSoup):
- return f"Fehler bei {url}"
+ return f"Error with {url}"
- # Titel aus dem title-Tag extrahieren
+ # Extract title from title tag
title_tag = soup.find('title')
- title = title_tag.text.strip() if title_tag else "Kein Titel"
+ title = title_tag.text.strip() if title_tag else "No title"
- # Alternative: Auch nach h1-Tags suchen, wenn der title-Tag fehlt
- if title == "Kein Titel":
+ # Alternative: Also look for h1 tags if title tag is missing
+ if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
@@ -555,19 +675,19 @@ class AgentWebcrawler(AgentBase):
def _extract_main_content(self, soup: BeautifulSoup, max_chars: int = 10000) -> str:
"""
- Extrahiert den Hauptinhalt aus einer HTML-Seite.
+ Extract the main content from an HTML page.
Args:
- soup: BeautifulSoup-Objekt der Webseite
- max_chars: Maximale Anzahl von Zeichen
+ soup: BeautifulSoup object of the webpage
+ max_chars: Maximum number of characters
Returns:
- Extrahierter Hauptinhalt als String
+ Extracted main content as a string
"""
if not isinstance(soup, BeautifulSoup):
return str(soup)[:max_chars] if soup else ""
- # Versuchen, Hauptinhaltselemente in Prioritätsreihenfolge zu finden
+ # Try to find main content elements in priority order
main_content = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
@@ -575,70 +695,71 @@ class AgentWebcrawler(AgentBase):
main_content = content
break
- # Wenn kein Hauptinhalt gefunden wurde, den Body verwenden
+ # If no main content found, use the body
if not main_content:
main_content = soup.find('body') or soup
- # Skript-, Style-, Nav-, Footer-Elemente entfernen, die nicht zum Hauptinhalt beitragen
+ # Remove script, style, nav, footer elements that don't contribute to main content
for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
- # Textinhalt extrahieren
+ # Extract text content
text_content = main_content.get_text(separator=' ', strip=True)
- # Auf max_chars begrenzen
+ # Limit to max_chars
return text_content[:max_chars]
def _parse_result(self, soup: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
"""
- Parst ein BeautifulSoup-Objekt in ein Ergebnis-Dictionary.
+ Parse a BeautifulSoup object into a result dictionary.
Args:
- soup: BeautifulSoup-Objekt der Webseite
- title: Seitentitel
- url: Seiten-URL
+ soup: BeautifulSoup object of the webpage
+ title: Page title
+ url: Page URL
Returns:
- Dictionary mit Ergebnisdaten
+ Dictionary with result data
"""
- # Inhalt extrahieren
+ # Extract content
content = self._extract_main_content(soup)
result = {
'title': title,
'url': url,
- 'snippet': 'Keine Beschreibung', # Standardwert
+ 'snippet': 'No description', # Default value
'data': content
}
return result
def _limit_text(self, text: str, max_chars: int = 10000) -> str:
"""
- Begrenzt den Text auf eine maximale Anzahl von Zeichen.
+ Limit text to a maximum number of characters.
Args:
- text: Eingangstext
- max_chars: Maximale Anzahl von Zeichen
+ text: Input text
+ max_chars: Maximum number of characters
Returns:
- Begrenzter Text
+ Limited text
"""
if not text:
return ""
- # Wenn der Text bereits unter dem Limit liegt, unverändert zurückgeben
+ # If text is already under the limit, return unchanged
if len(text) <= max_chars:
return text
- # Andernfalls den Text auf max_chars begrenzen
- return text[:max_chars] + "... [Inhalt aufgrund der Länge gekürzt]"
+ # Otherwise limit text to max_chars
+ return text[:max_chars] + "... [Content truncated due to length]"
-# Singleton-Instanz
-_webcrawler_agent = None
+# Factory function for the Webcrawler agent
def get_webcrawler_agent():
- """Gibt eine Singleton-Instanz des Webcrawler-Agenten zurück"""
- global _webcrawler_agent
- if _webcrawler_agent is None:
- _webcrawler_agent = AgentWebcrawler()
- return _webcrawler_agent
\ No newline at end of file
+ """
+ Factory function that returns an instance of the Webcrawler agent.
+
+ Returns:
+ An instance of the Webcrawler agent
+ """
+ return AgentWebcrawler()
\ No newline at end of file
diff --git a/modules/chat_registry.py b/modules/chat_registry.py
index 891e9d01..c189148d 100644
--- a/modules/chat_registry.py
+++ b/modules/chat_registry.py
@@ -1,207 +1,204 @@
"""
-Chat Agent Registry Modul.
-Stellt ein zentrales Registry-System für alle verfügbaren Agenten bereit.
+Chat Agent Registry Module.
+Provides a central registry system for all available agents.
+Optimized for the standardized task processing pattern.
"""
import os
import logging
import importlib
+import uuid
+from datetime import datetime
from typing import Dict, Any, List, Optional
logger = logging.getLogger(__name__)
+class AgentBase:
+ """
+ Base class for all chat agents.
+ Defines the standardized interface for task processing.
+ """
+
+ def __init__(self):
+ """Initialize the base agent."""
+ self.name = "base-agent"
+ self.description = "Basic agent functionality"
+ self.capabilities = []
+ self.ai_service = None
+
+ def set_dependencies(self, ai_service=None):
+ """Set external dependencies for the agent."""
+ self.ai_service = ai_service
+
+ def get_agent_info(self) -> Dict[str, Any]:
+ """
+ Return standardized information about the agent's capabilities.
+
+ Returns:
+ Dictionary with name, description, and capabilities
+ """
+ return {
+ "name": self.name,
+ "description": self.description,
+ "capabilities": self.capabilities
+ }
+
+ async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
+ """
+ Process a standardized task structure and return results.
+ This method must be implemented by all concrete agent classes.
+
+ Args:
+ task: A dictionary containing:
+ - task_id: Unique ID for this task
+ - workflow_id: ID of the parent workflow (optional)
+ - prompt: The main instruction for the agent
+ - input_documents: List of document objects to process
+ - output_specifications: List of required output documents
+ - context: Additional contextual information
+
+ Returns:
+ A dictionary containing:
+ - feedback: Text response explaining what the agent did
+ - documents: List of document objects created by the agent
+ """
+ # Base implementation - should be overridden by specialized agents
+ logger.warning(f"Agent {self.name} is using the default implementation of process_task")
+ return {
+ "feedback": f"The process_task method was not implemented by agent '{self.name}'.",
+ "documents": []
+ }
+
+
class AgentRegistry:
- """Zentrale Registry für alle verfügbaren Agenten im System."""
+ """Central registry for all available agents in the system."""
_instance = None
@classmethod
def get_instance(cls):
- """Gibt eine Singleton-Instanz der Agent-Registry zurück."""
+ """Return a singleton instance of the agent registry."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
- """Initialisiert die Agent-Registry."""
+ """Initialize the agent registry."""
if AgentRegistry._instance is not None:
- raise RuntimeError("Singleton-Instanz existiert bereits - verwende get_instance()")
+ raise RuntimeError("Singleton instance already exists - use get_instance()")
self.agents = {}
self.ai_service = None
self._load_agents()
def _load_agents(self):
- """Lädt alle verfügbaren Agenten aus den Modulen."""
- logger.info("Lade Agent-Module...")
+ """Load all available agents from modules."""
+ logger.info("Loading agent modules...")
- # Liste der zu ladenden Agent-Module
+ # List of agent modules to load
agent_modules = []
agent_dir = os.path.dirname(__file__)
- # Durchsuche das Verzeichnis nach Agent-Modulen
+ # Search the directory for agent modules
for filename in os.listdir(agent_dir):
if filename.startswith("chat_agent_") and filename.endswith(".py"):
- agent_modules.append(filename[:-3]) # Entferne .py-Endung
+ agent_modules.append(filename[:-3]) # Remove .py extension
if not agent_modules:
- logger.warning("Keine Agent-Module gefunden")
+ logger.warning("No agent modules found")
return
- logger.info(f"{len(agent_modules)} Agent-Module gefunden")
+ logger.info(f"{len(agent_modules)} agent modules found")
- # Lade jedes Agent-Modul
+ # Load each agent module
for module_name in agent_modules:
try:
- # Importiere das Modul
+ # Import the module
module = importlib.import_module(f"modules.{module_name}")
- # Suche nach der Agent-Klasse oder einer get_*_agent-Funktion
- agent_name= module_name.split('_')[-1]
+ # Look for agent class or get_*_agent function
+ agent_name = module_name.split('_')[-1]
class_name = f"Agent{agent_name.capitalize()}"
getter_name = f"get_{agent_name}_agent"
agent = None
- # Versuche, den Agenten über die get_*_agent-Funktion zu erhalten
+ # Try to get the agent via the get_*_agent function
if hasattr(module, getter_name):
getter_func = getattr(module, getter_name)
agent = getter_func()
- logger.info(f"Agent '{agent.name}' über {getter_name}() geladen")
+ logger.info(f"Agent '{agent.name}' loaded via {getter_name}()")
- # Alternativ versuche, den Agenten direkt zu instanziieren
+ # Alternatively, try to instantiate the agent directly
elif hasattr(module, class_name):
agent_class = getattr(module, class_name)
agent = agent_class()
- logger.info(f"Agent '{agent.name}' (Typ: {agent.name}) direkt instanziert")
+ logger.info(f"Agent '{agent.name}' directly instantiated")
if agent:
- # Registriere den Agenten
+ # Register the agent
self.register_agent(agent)
else:
- logger.warning(f"Keine Agent-Klasse oder Getter-Funktion in Modul {module_name} gefunden")
+ logger.warning(f"No agent class or getter function found in module {module_name}")
except ImportError as e:
- logger.error(f"Modul {module_name} konnte nicht importiert werden: {e}")
+ logger.error(f"Module {module_name} could not be imported: {e}")
except Exception as e:
- logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
+ logger.error(f"Error loading agent from module {module_name}: {e}")
def set_ai_service(self, ai_service):
+ """Set the AI service for all agents."""
self.ai_service = ai_service
self.update_agent_dependencies()
def update_agent_dependencies(self):
- """Aktualisiert die Abhängigkeiten für alle registrierten Agenten."""
+ """Update dependencies for all registered agents."""
for agent_id, agent in self.agents.items():
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(ai_service=self.ai_service)
def register_agent(self, agent):
"""
- Registriert einen Agenten in der Registry.
+ Register an agent in the registry.
Args:
- agent: Der zu registrierende Agent
+ agent: The agent to register
"""
agent_id = getattr(agent, 'name', "unknown_agent")
- # Initialisiere Agenten mit Abhängigkeiten
+ # Initialize agent with dependencies
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(ai_service=self.ai_service)
self.agents[agent_id] = agent
- logger.debug(f"Agent '{agent.name}' (Typ: {agent_id}, Name: {agent_id}) registriert")
+ logger.debug(f"Agent '{agent.name}' registered")
def get_agent(self, agent_identifier: str):
"""
- Gibt eine Agenten-Instanz zurück
+ Return an agent instance
Args:
- agent_identifier: ID oder Typ des gewünschten Agenten
+ agent_identifier: ID or type of the desired agent
Returns:
- Agenten-Instanz oder None, falls nicht gefunden
+ Agent instance or None if not found
"""
if agent_identifier in self.agents:
return self.agents[agent_identifier]
- logger.error(f"Agent mit Kennung '{agent_identifier}' nicht gefunden")
+ logger.error(f"Agent with identifier '{agent_identifier}' not found")
return None
def get_all_agents(self) -> Dict[str, Any]:
- """Gibt alle registrierten Agenten zurück."""
+ """Return all registered agents."""
return self.agents
def get_agent_infos(self) -> List[Dict[str, Any]]:
- """Gibt Informationen über alle registrierten Agenten zurück."""
+ """Return information about all registered agents."""
agent_infos = []
seen_agents = set()
for agent in self.agents.values():
if agent not in seen_agents:
- # Verwende get_agent_info oder erstelle manuell die Info
- if hasattr(agent, 'get_agent_info'):
- agent_infos.append(agent.get_agent_info())
- else:
- agent_infos.append({
- "name": agent.name,
- "capabilities": getattr(agent, 'capabilities', ""),
- })
- logger.error(f"Agent '{agent.name}' does not show profile.")
+ agent_infos.append(agent.get_agent_info())
seen_agents.add(agent)
return agent_infos
-# Basis-Agent-Klasse
-class AgentBase:
- """
- Basis-Klasse für alle Chat-Agenten.
- Definiert die grundlegende Schnittstelle und Funktionalität.
- """
-
- def __init__(self):
- """Initialisiere den Basis-Agenten."""
- self.name = "Basis-Agent"
- self.capabilities = "Grundlegende Agentenfunktionen"
- self.ai_service = None
-
- def set_dependencies(self, ai_service=None):
- self.ai_service = ai_service
-
- def get_config(self) -> Dict[str, Any]:
- return {
- "name": self.name,
- "capabilities": self.capabilities,
- }
-
- async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
- # Basisimplementierung - sollte von spezialisierten Agenten überschrieben werden
- if not self.ai_service:
- logger.warning(f"Agent {self.id} hat keinen konfigurierten AI-Service")
- return {
- "role": "assistant",
- "content": f"Ich bin {self.name}, aber ich bin nicht richtig konfiguriert. Bitte den AI-Service einrichten.",
- "agent_name": self.name,
- }
-
- # Einfachen Prompt erstellen
- prompt = message.get("content", "")
-
- # Antwort generieren
- try:
- response_content = self.ai_service.call_api([
- {"role": "system", "content": f"Du bist {self.name}, ein spezialisierter {self.name}-Agent mit Fähigkeiten in: {self.capabilities}"},
- {"role": "user", "content": prompt}
- ])
-
- return {
- "role": "assistant",
- "content": response_content,
- "agent_name": self.name,
- }
- except Exception as e:
- logger.error(f"Fehler in Agent {self.id}: {str(e)}")
- return {
- "role": "assistant",
- "content": f"Ich habe einen Fehler festgestellt: {str(e)}",
- "agent_name": self.name,
- }
-
-
-# Singleton-Factory für die Agent-Registry
+# Singleton factory for the agent registry
def get_agent_registry():
return AgentRegistry.get_instance()
\ No newline at end of file
diff --git a/notes/changelog.txt b/notes/changelog.txt
index 7cd9975e..ae878ef6 100644
--- a/notes/changelog.txt
+++ b/notes/changelog.txt
@@ -1,33 +1,42 @@
....................... TASKS
+please revise all chat_agents* modules:
+- all comments, logs and outputs in english language
+- all ai answers in the language of the user
+- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
+- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
-run agent, then save output files to db
-. files save-> fileid list, ALWAYS TO WRITE NEW FILES!
-. chat_message_to_workflow(role, agent,chatmsg, workflow): with answer and fileidlist
+documentation agent:
+- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
+webcrawler_agent:
+- there is a try - except mapping problem in the code. please also fix this
+-
+
+also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
----------------------- OPEN
PRIO1:
-Split big files into content-parts
+sharepoint connector with document search, content search, content extraction
+add connector to myoutlook
+
+Split big files into content-parts
PRIO2:
implement cleanup routines for files in lucydom_interface (File_Management_CLEANUP_INTERVAL): temp older than interval, all orphaned
-frontend: no labels definition
-
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
-sharepoint connector with document search, content search, content extraction
-
-add connector to myoutlook
-
frontend to react
+frontend: no labels definition
+
+
----------------------- DONE
diff --git a/requirements.txt b/requirements.txt
index 00d82cba..8e6dfa11 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,6 +27,7 @@ pandas==2.2.3 # Aktuelle Version beibehalten
## Data Visualization
matplotlib==3.8.0 # Aktuelle Version beibehalten
seaborn==0.13.0
+markdown
## Web Scraping & HTTP
beautifulsoup4==4.12.2