This commit is contained in:
ValueOn AG 2025-04-20 23:53:37 +02:00
parent 9247de4346
commit 8b234a9a30
9 changed files with 2240 additions and 1725 deletions

View file

@ -7,7 +7,9 @@ von Benutzeranfragen, Agentenausführung und Ergebnisformatierung.
import os
import logging
import json
import re
import uuid
import base64
from datetime import datetime
from typing import Dict, Any, List, Optional, Union
@ -69,7 +71,7 @@ class ChatManager:
# 4. Speichere die Antwort als Message im Workflow und füge Log-Einträge hinzu
response_message = {
"role": "assistant",
"agent_type": "project_manager",
"agent_name": "project_manager",
"content": obj_user_response
}
self.message_add(workflow, response_message)
@ -213,7 +215,7 @@ JSON_OUTPUT = {{
# Parsen der JSON-Antwort
return self.parse_json_response(project_manager_output)
def chat_message_to_workflow(self, role: str, agent_type: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
"""
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
@ -223,7 +225,7 @@ JSON_OUTPUT = {{
Returns:
Message-Objekt mit Inhalt und Dokumenten samt Inhalten
"""
logger.info(f"Message from {role} {agent_type} sent with {len(chat_message.get('list_file_id', []))} documents")
logger.info(f"Message from {role} {agent_name} sent with {len(chat_message.get('list_file_id', []))} documents")
logger.debug(f"message = {self.parse_json2text(chat_message)}.")
# Nachrichteninhalt überprüfen
@ -243,7 +245,7 @@ JSON_OUTPUT = {{
# Nachrichtenobjekt erstellen
message_object = {
"role": role,
"agent_type": agent_type,
"agent_name": agent_name,
"content": message_content,
"documents": additional_files
}
@ -268,7 +270,7 @@ JSON_OUTPUT = {{
# Create basic message structure
final_message = {
"role": "assistant",
"agent_type": "project_manager",
"agent_name": "project_manager",
"content": obj_user_response,
"documents": []
}
@ -521,6 +523,7 @@ JSON_OUTPUT = {{
async def agent_processing(self, task: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Process a single agent task from the workflow.
Optimized for the task-based approach where all agents implement process_task.
Args:
task: The task definition containing agent name, prompt, and document specifications
@ -534,7 +537,8 @@ JSON_OUTPUT = {{
agent_prompt = task.get("prompt", "")
# Log the current step
step_info = f"Agent '{agent_name}' to create {', '.join([d.get('label') for d in task.get('output_documents', [])])}."
output_labels = [d.get("label", "unknown") for d in task.get("output_documents", [])]
step_info = f"Agent '{agent_name}' to create {', '.join(output_labels)}."
self.log_add(workflow, step_info)
# Check if prompt is empty
@ -542,63 +546,48 @@ JSON_OUTPUT = {{
logger.warning("Empty prompt, no task to do")
return []
# Prepare input documents for the agent
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
# Prepare output documents for the agent
output_documents = []
for doc in task.get("output_documents",[]):
output_document={
"label":doc.get("label"),
"descripton_file_content":doc.get("prompt")
}
output_documents.append(output_document)
# Create AI prompt
ai_prompt = f"""
# Please deliver documents according to this instruction:
<instruction>
{agent_prompt}
</instruction>
# Input documents:
{self.parse_json2text(input_documents)}
# Output documents to provide:
{self.parse_json2text(output_documents)}
Your output must be strictly in the following JSON_OUTPUT format, with no additions before or after the JSON object:
JSON_OUTPUT = {{
"feedback":"your feedback for the delivered result",
"documents": [
{{
"label":"label of output document",
"content": "the produced content; if text format, then as text, otherwise in base64 format"
}},
# each output document a separate item
]
}}
"""
# Get agent from registry
agent = self.agent_registry.get_agent(agent_name)
if not agent:
logger.error(f"Agent '{agent_name}' not found")
return []
# Execute the agent
# Prepare input documents for the agent
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
# Prepare output document specifications
output_specs = []
for doc in task.get("output_documents", []):
output_spec = {
"label": doc.get("label"),
"description": doc.get("prompt", "")
}
output_specs.append(output_spec)
# Create a standardized task object for the agent
agent_task = {
"task_id": str(uuid.uuid4()),
"workflow_id": workflow.get("id"),
"prompt": agent_prompt,
"input_documents": input_documents,
"output_specifications": output_specs,
"context": {
"workflow_round": workflow.get("current_round", 1),
"agent_type": agent_name,
"timestamp": datetime.now().isoformat()
}
}
# Execute the agent with the standardized task
try:
agent_results = await agent.process_message(ai_prompt)
except Exception as e:
logger.error(f"Error executing agent '{agent_name}': {str(e)}")
return []
# Process the task using the agent's standardized interface
agent_results = await agent.process_task(agent_task)
# Log the agent response
self.log_add(
workflow,
f"Agent '{agent_name}' completed task. Feedback: {agent_results.get('feedback', 'No feedback provided')}"
)
# Store produced files and prepare input object for message
agent_inputs = {
@ -606,13 +595,23 @@ JSON_OUTPUT = {{
"list_file_id": self.agent_save_documents(agent_results)
}
# Create a message in the workflow with the agent's response
agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
logger.debug(f"agent result = {self.parse_json2text(agent_message)}.")
return agent_message.get("documents")
logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
return agent_message.get("documents", [])
except Exception as e:
error_msg = f"Error executing agent '{agent_name}': {str(e)}"
logger.error(error_msg)
self.log_add(workflow, error_msg, level="error")
return []
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
"""
Saves all documents from agent results as files and returns a list of file IDs.
Enhanced to handle the standardized document format from agents.
Args:
agent_results: Dictionary containing agent feedback and documents
@ -632,32 +631,45 @@ JSON_OUTPUT = {{
content = doc.get("content", "")
# Split label into name and extension
import os
name, ext = os.path.splitext(label)
if ext.startswith('.'):
ext = ext[1:] # Remove leading dot
elif not ext:
# If no extension is provided, default to .txt for text content
ext = "txt"
label = f"{label}.{ext}"
# Determine if content is base64 encoded
is_base64 = False
import base64
if not isinstance(content, bytes):
# Check if content looks like base64
# Check if content might be base64 encoded
try:
if content and isinstance(content, str):
# Check for base64 pattern (simplified)
if (len(content) % 4 == 0 and
re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
# Try to decode a small sample
if content and isinstance(content, str) and len(content) > 0:
sample = content[:100] if len(content) > 100 else content
base64.b64decode(sample)
# If no error, assume it's base64
is_base64 = True
except Exception:
# Not base64, treat as regular text
is_base64 = False
# If content has metadata flag indicating it's base64
if isinstance(content, dict) and content.get("_is_base64", False):
is_base64 = True
content = content.get("data", "")
# Convert content to bytes
if isinstance(content, str):
if is_base64:
# Decode base64 to bytes
try:
file_content = base64.b64decode(content)
except Exception as e:
logger.warning(f"Failed to decode base64 content: {str(e)}")
file_content = content.encode('utf-8')
else:
# Convert text to bytes
file_content = content.encode('utf-8')
@ -682,6 +694,7 @@ JSON_OUTPUT = {{
return file_ids
### Messages
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
@ -740,7 +753,7 @@ JSON_OUTPUT = {{
Zusammenfassung der Nachricht
"""
role = message.get("role", "undefined")
agent_type = message.get("agent_type", "")
agent_name = message.get("agent_name", "")
content = message.get("content", "")
try:
@ -762,7 +775,7 @@ JSON_OUTPUT = {{
if docs_list:
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}"
return f"[{role} {agent_type}]: {content_summary}{docs_summary}"
return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
async def message_summarize_content(self, content: Dict[str, Any]) -> str:
"""

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,113 +1,360 @@
"""
Kreativer Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung.
Angepasst für die neue chat.py Architektur und chat_registry.py.
Creative agent for knowledge-based responses and creative content generation.
Optimized for the new task-based processing.
"""
import logging
from typing import Dict, Any, List, Optional
from typing import Dict, Any, List
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentCreative(AgentBase):
"""Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung"""
"""Agent for knowledge-based responses and creative content generation"""
def __init__(self):
"""Initialisiert den kreativen Agent"""
"""Initialize the creative agent"""
super().__init__()
self.name = "Creative Knowledge Assistant"
self.capabilities = ("knowledge_sharing,content_creation,document_generation,"
"creative_writing,poweron,document_processing,"
"information_extraction,data_transformation,"
"document_analysis,text_processing,table_creation,"
"content_structuring")
self.name = "creative"
self.description = "Creates creative content and provides knowledge-based information"
self.capabilities = [
"knowledge_sharing",
"content_creation",
"creative_writing",
"information_synthesis",
"document_generation",
"question_answering"
]
def get_agent_info(self) -> Dict[str, Any]:
"""Gibt Agent-Informationen für die Registry zurück"""
info = super().get_config()
return info
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und generiert eine kreative oder wissensbasierte Antwort.
Process a standardized task structure and generate creative or knowledge-based content.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
Die generierte Antwort
A dictionary containing:
- feedback: Text response explaining the created content
- documents: List of created document objects
"""
# Workflow-ID aus Kontext oder Nachricht extrahieren
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
try:
# Extract relevant task information
prompt = task.get("prompt", "")
input_documents = task.get("input_documents", [])
output_specs = task.get("output_specifications", [])
# Antwortstruktur erstellen
response = {
"role": "assistant",
"content": "",
"agent_name": self.name,
"workflow_id": workflow_id,
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Creative agent")
return {
"feedback": "The Creative agent is not properly configured.",
"documents": []
}
try:
# Benutzernachricht extrahieren
user_message = message.get("content", "")
# Extract context from input documents
document_context = self._extract_document_context(input_documents)
if not user_message:
response["content"] = "Bitte geben Sie eine Nachricht an, auf die ich antworten kann."
return response
# PowerOn handling, if included in the request
if "poweron" in prompt.lower():
return await self._handle_poweron_task(prompt, output_specs)
# PowerOn-Behandlung, falls in der Anfrage enthalten
if "poweron" in user_message.lower():
logger.info("PowerOn-Schlüsselwort erkannt, spezielle Antwort generieren")
# Collect generated documents
generated_documents = []
# Determine content type based on the prompt
content_type = self._determine_content_type(prompt)
# Generate a document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Determine format based on file extension
format_type = self._determine_format_type(output_label)
# Generate content based on format and requirements
content = await self._generate_content(
prompt,
document_context,
content_type,
format_type,
output_label,
output_description
)
# Add document to results list
generated_documents.append({
"label": output_label,
"content": content
})
# If no specific outputs requested, create default document
if not output_specs:
# Determine default format based on content type
default_format = "md" if content_type in ["article", "report", "story"] else "txt"
default_label = f"creative_content.{default_format}"
# Generate content
content = await self._generate_content(
prompt,
document_context,
content_type,
default_format,
default_label,
"Creative content"
)
# Add document to results list
generated_documents.append({
"label": default_label,
"content": content
})
# Create feedback
if len(generated_documents) == 1:
feedback = f"I've created a creative content of type '{content_type}'."
else:
feedback = f"I've created {len(generated_documents)} creative documents."
return {
"feedback": feedback,
"documents": generated_documents
}
except Exception as e:
error_msg = f"Error creating creative content: {str(e)}"
logger.error(error_msg)
return {
"feedback": f"An error occurred while creating creative content: {str(e)}",
"documents": []
}
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Extract context from input documents.
Args:
documents: List of document objects
Returns:
Extracted context as text
"""
context_parts = []
for doc in documents:
doc_name = doc.get("name", "Unnamed document")
context_parts.append(f"--- {doc_name} ---")
for content in doc.get("contents", []):
if content.get("metadata", {}).get("is_text", False):
context_parts.append(content.get("data", ""))
return "\n\n".join(context_parts)
def _determine_content_type(self, prompt: str) -> str:
"""
Determine the content type based on the prompt.
Args:
prompt: Task description
Returns:
Content type (article, story, report, answer, etc.)
"""
prompt_lower = prompt.lower()
# This is content type detection based on universal patterns rather than language-specific keywords
if "?" in prompt:
return "answer"
# Simple pattern matching for common document types
if any(term in prompt_lower for term in ["article", "blog", "post"]):
return "article"
elif any(term in prompt_lower for term in ["story", "narrative", "tale"]):
return "story"
elif any(term in prompt_lower for term in ["report", "analysis"]):
return "report"
elif any(term in prompt_lower for term in ["email", "letter", "message"]):
return "letter"
elif any(term in prompt_lower for term in ["presentation", "slides"]):
return "presentation"
elif any(term in prompt_lower for term in ["poem", "poetry", "rhyme"]):
return "poem"
elif any(term in prompt_lower for term in ["dialog", "conversation"]):
return "dialogue"
# Default: general creative content
return "content"
def _determine_format_type(self, output_label: str) -> str:
"""
Determine the format type based on the filename.
Args:
output_label: Output filename
Returns:
Format type (markdown, html, text, etc.)
"""
if not '.' in output_label:
return "txt" # Default format
extension = output_label.split('.')[-1].lower()
if extension == "md":
return "markdown"
elif extension == "html":
return "html"
elif extension in ["txt", "text"]:
return "text"
elif extension == "json":
return "json"
else:
# Fallback to markdown for unknown extensions
return "markdown"
async def _handle_poweron_task(self, prompt: str, output_specs: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
Handle special PowerOn-related tasks.
Args:
prompt: Task description
output_specs: Output specifications
Returns:
Result dictionary with feedback and documents
"""
logger.info("PowerOn keyword detected, generating special response")
poweron_prompt = f"""
Bedanke dich beim Benutzer in der Sprache seiner Anfrage ganz herzlich dafür, dass er daran denkt, dass du PowerOn bist.
Teile ihm mit, wie erfreut du bist, Teil der PowerOn-Familie zu sein, die daran arbeitet, Menschen für ein besseres Leben zu unterstützen.
Thank the user in their request language for remembering that you are PowerOn.
Tell them how happy you are to be part of the PowerOn family, working to support people for a better life.
Generiere dann eine kurze Antwort (1-2 Sätze) auf diese Frage: {user_message}
Then generate a brief response (1-2 sentences) to this question: {prompt}
"""
try:
poweron_response = await self.ai_service.call_api([
{"role": "system", "content": "Du bist ein hilfreicher Assistent, der Teil der PowerOn-Familie ist."},
{"role": "system", "content": "You are a helpful assistant who is part of the PowerOn family."},
{"role": "user", "content": poweron_prompt}
])
response["content"] = poweron_response
return response
# Collect generated documents
generated_documents = []
# Create a document for each requested output
if output_specs:
for spec in output_specs:
output_label = spec.get("label", "")
format_type = self._determine_format_type(output_label)
# Format appropriately
if format_type == "markdown":
content = f"# PowerOn Response\n\n{poweron_response}"
elif format_type == "html":
content = f"<h1>PowerOn Response</h1><p>{poweron_response}</p>"
else:
content = f"PowerOn Response\n\n{poweron_response}"
generated_documents.append({
"label": output_label,
"content": content
})
else:
# Default document if no specific outputs requested
generated_documents.append({
"label": "poweron_response.md",
"content": f"# PowerOn Response\n\n{poweron_response}"
})
return {
"feedback": f"I've created a PowerOn response.",
"documents": generated_documents
}
except Exception as e:
logger.error(f"Fehler beim Aufruf der API für PowerOn: {str(e)}")
response["content"] = "Ich bin auf einen Fehler gestoßen, während ich eine PowerOn-Antwort generierte. Bitte versuchen Sie es erneut."
return response
logger.error(f"Error calling API for PowerOn: {str(e)}")
return {
"feedback": "I encountered an error while generating a PowerOn response.",
"documents": []
}
# Einfacher Systemprompt, der sich auf die direkte Antwort auf die Benutzeranfrage konzentriert
system_prompt = """Du bist ein hilfreicher, kreativer Assistent.
Antworte direkt auf die Anfrage des Benutzers, ohne auf einen Workflow oder Systemkontext zu verweisen.
Konzentriere dich nur darauf, eine direkte, hilfreiche Antwort auf die spezifische Frage oder Anfrage zu geben."""
async def _generate_content(self, prompt: str, context: str, content_type: str,
format_type: str, output_label: str, output_description: str) -> str:
"""
Generate creative or knowledge-based content based on the prompt.
# Verarbeiten mit dem KI-Service
Args:
prompt: Task description
context: Document context
content_type: Type of content to create
format_type: Output format
output_label: Output filename
output_description: Description of desired output
Returns:
Generated content
"""
if not self.ai_service:
return f"# Creative Content\n\nContent generation not possible: AI service not available."
# Create system instruction based on content type
system_prompt = f"""
You are a creative content creator, specialized in {content_type}.
Your task is to create high-quality, engaging, and accurate content.
Make the content structured, clear, and appealing in the desired format.
"""
# Create main prompt with all available information
generation_prompt = f"""
Create creative content of type '{content_type}' based on the following request:
REQUEST:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
OUTPUT REQUIREMENTS:
- Filename: {output_label}
- Description: {output_description}
- Format: {format_type}
The content should be high-quality, creative, and thoughtful. Follow all instructions in the request precisely.
The content must perfectly match the {format_type} format.
"""
try:
# Call AI for content generation
content = await self.ai_service.call_api([
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
{"role": "user", "content": generation_prompt}
])
response["content"] = content
return response
# For markdown format, ensure there's a title at the beginning
if format_type == "markdown" and not content.strip().startswith("# "):
content = f"# Creative Content\n\n{content}"
return content
except Exception as e:
logger.error(f"Fehler in process_message: {str(e)}")
response["content"] = f"Bei der Verarbeitung Ihrer Anfrage ist ein Fehler aufgetreten: {str(e)}"
return response
logger.error(f"Error in creative content generation: {str(e)}")
return f"# Creative Content\n\nError in content generation: {str(e)}"
# Singleton-Instanz
_creative_agent = None
# Factory function for the Creative agent
def get_creative_agent():
"""Gibt eine Singleton-Instanz des kreativen Agenten zurück"""
global _creative_agent
if _creative_agent is None:
_creative_agent = AgentCreative()
return _creative_agent
"""
Factory function that returns an instance of the Creative agent.
Returns:
An instance of the Creative agent
"""
return AgentCreative()

View file

@ -1,312 +1,453 @@
"""
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
Angepasst für die neue chat.py Architektur und chat_registry.py.
Documentation agent for creating documentation, reports, and structured content.
Optimized for the new task-based processing.
"""
import logging
import json
import uuid
from typing import Dict, Any, List
from datetime import datetime
from modules.chat_registry import AgentBase
logger = logging.getLogger(__name__)
class AgentDocumentation(AgentBase):
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
"""Agent for creating documentation and structured content"""
def __init__(self):
"""Initialisiert den Dokumentations-Agent"""
"""Initialize the documentation agent"""
super().__init__()
self.name = "Documentation Specialist"
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
self.name = "documentation"
self.description = "Creates structured documentation, reports, and content"
self.capabilities = [
"report_generation",
"documentation",
"content_structuring",
"technical_writing",
"knowledge_organization"
]
def get_agent_info(self) -> Dict[str, Any]:
"""Gibt Agent-Informationen für die Registry zurück"""
info = super().get_config()
return info
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und erstellt Dokumentation.
Process a standardized task structure and create documentation.
Args:
message: Eingabenachricht
context: Optionaler Kontext
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
Antwortnachricht mit Dokumentation
A dictionary containing:
- feedback: Text response explaining the created documentation
- documents: List of created document objects
"""
# Workflow-ID aus Kontext oder Nachricht extrahieren
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
try:
# Extract relevant task information
prompt = task.get("prompt", "")
input_documents = task.get("input_documents", [])
output_specs = task.get("output_specifications", [])
# Antwortstruktur erstellen
response = {
"role": "assistant",
"content": "",
"agent_name": self.name,
"workflow_id": workflow_id,
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Documentation agent")
return {
"feedback": "The Documentation agent is not properly configured.",
"documents": []
}
try:
# Aufgabe aus Nachricht extrahieren
task = message.get("content", "")
# Extract context from input documents
document_context = self._extract_document_context(input_documents)
# Dokumenttyp erkennen
document_type = self._detect_document_type(task)
logger.info(f"Erstelle {document_type}-Dokumentation")
# Generate title for the document
title = await self._generate_title(prompt, document_context)
# Angehängte Dokumente verarbeiten
document_context = ""
if message.get("documents"):
logger.info("Verarbeite Referenzdokumente")
document_context = self._process_documents(message)
# Collect created documents
generated_documents = []
# Prompt mit Dokumentkontext erweitern
enhanced_prompt = f"{task}\n\n{document_context}" if document_context else task
# Create a document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Komplexität bewerten
is_complex = self._assess_complexity(enhanced_prompt)
# Determine format and document type based on file extension
format_type, document_type = self._determine_format_and_type(output_label)
# Titel generieren
title = await self._generate_title(enhanced_prompt, document_type)
# Assess complexity
is_complex = self._assess_complexity(prompt)
# Inhalt basierend auf Komplexität generieren
# Generate document content based on complexity
if is_complex:
content = await self._generate_complex_document(enhanced_prompt, document_type, title)
content = await self._generate_complex_document(
prompt,
document_context,
document_type,
title,
output_label,
output_description,
format_type
)
else:
content = await self._generate_simple_document(enhanced_prompt, document_type, title)
content = await self._generate_simple_document(
prompt,
document_context,
document_type,
title,
output_label,
output_description,
format_type
)
# Dokument erstellen
doc_id = f"doc_{uuid.uuid4()}"
document = {
"id": doc_id,
"source": {
"type": "generated",
"id": doc_id,
"name": title,
"content_type": "text/markdown"
},
"contents": [
{
"type": "text",
"text": content,
"is_extracted": True
# Add document to results list
generated_documents.append({
"label": output_label,
"content": content
})
# If no specific outputs requested, create default markdown document
if not output_specs:
content = await self._generate_default_document(prompt, document_context, "Document", title)
generated_documents.append({
"label": f"{self._sanitize_filename(title)}.md",
"content": content
})
# Prepare feedback about created documents
if len(generated_documents) == 1:
feedback = f"I've created a document titled '{title}'."
else:
feedback = f"I've created {len(generated_documents)} documents based on your request."
return {
"feedback": feedback,
"documents": generated_documents
}
]
}
# Dokument zur Antwort hinzufügen
response["documents"].append(document)
# Antwortinhalt aktualisieren
response["content"] = f"Ich habe ein Dokument mit dem Titel '{title}' erstellt, das die gewünschten Informationen enthält. Das Dokument ist dieser Nachricht beigefügt."
return response
except Exception as e:
error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}"
error_msg = f"Error creating documentation: {str(e)}"
logger.error(error_msg)
response["content"] = f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}"
return response
return {
"feedback": f"An error occurred while creating the documentation: {str(e)}",
"documents": []
}
def _detect_document_type(self, message: str) -> str:
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
"""
Erkennt den Dokumenttyp aus der Nachricht.
Extract context from input documents.
Args:
message: Benutzernachricht
documents: List of document objects
Returns:
Erkannter Dokumenttyp
Extracted context as text
"""
message = message.lower()
if not documents:
return ""
if any(term in message for term in ["manual", "guide", "instruction", "tutorial", "anleitung", "handbuch"]):
return "manual"
elif any(term in message for term in ["report", "analysis", "assessment", "review", "bericht", "analyse"]):
return "report"
elif any(term in message for term in ["process", "workflow", "procedure", "steps", "prozess", "ablauf"]):
return "process"
elif any(term in message for term in ["presentation", "slides", "deck", "präsentation", "folien"]):
return "presentation"
context_parts = []
for doc in documents:
doc_name = doc.get("name", "Unnamed document")
context_parts.append(f"--- {doc_name} ---")
for content in doc.get("contents", []):
if content.get("metadata", {}).get("is_text", False):
context_parts.append(content.get("data", ""))
return "\n\n".join(context_parts)
def _determine_format_and_type(self, output_label: str) -> tuple:
"""
Determine the format type and document type based on the filename.
Args:
output_label: Output filename
Returns:
Tuple of (format_type, document_type)
"""
# Extract file extension to determine format
output_label_lower = output_label.lower()
# Determine format based on extension
if output_label_lower.endswith(".md"):
format_type = "markdown"
elif output_label_lower.endswith(".html"):
format_type = "html"
elif output_label_lower.endswith(".txt"):
format_type = "text"
elif output_label_lower.endswith(".csv"):
format_type = "csv"
elif output_label_lower.endswith(".json"):
format_type = "json"
else:
return "document"
# Default to markdown
format_type = "markdown"
def _process_documents(self, message: Dict[str, Any]) -> str:
# Determine document type based on filename or format
if "manual" in output_label_lower or "guide" in output_label_lower:
document_type = "Manual"
elif "report" in output_label_lower or "analysis" in output_label_lower:
document_type = "Report"
elif "process" in output_label_lower or "workflow" in output_label_lower:
document_type = "Process Documentation"
elif "present" in output_label_lower or "slide" in output_label_lower:
document_type = "Presentation"
else:
document_type = "Document"
return format_type, document_type
def _assess_complexity(self, prompt: str) -> bool:
"""
Verarbeitet Dokumente in der Nachricht.
Assess the complexity of the task.
Args:
message: Nachricht mit Dokumenten
prompt: Task description
Returns:
Dokumentkontext als Text
True for complex tasks, False otherwise
"""
document_context = ""
# Language-agnostic complexity assessment
prompt_length = len(prompt)
for document in message.get("documents", []):
source = document.get("source", {})
doc_name = source.get("name", "unnamed")
# Check for structural indicators in a language-agnostic way
has_sections = ":" in prompt and "\n" in prompt
has_lists = "-" in prompt or "*" in prompt or "#" in prompt
document_context += f"\n\n--- {doc_name} ---\n"
# Complex if the prompt is long or contains structural elements
return prompt_length > 500 or has_sections or has_lists
for content in document.get("contents", []):
if content.get("type") == "text":
document_context += content.get("text", "")
return document_context
def _assess_complexity(self, task: str) -> bool:
def _sanitize_filename(self, filename: str) -> str:
"""
Bewertet die Aufgabenkomplexität.
Sanitize a filename by removing invalid characters.
Args:
task: Die Aufgabenbeschreibung
filename: Filename to sanitize
Returns:
True bei komplexem Dokument, sonst False
Sanitized filename
"""
# Einfache Heuristik zur Komplexitätsbewertung
complexity_indicators = [
"detailliert", "ausführlich", "umfassend", "komplex", "detailed",
"comprehensive", "in-depth", "multiple sections", "kapitel",
"abschnitte", "struktur", "analyse", "vergleich"
]
# Replace invalid characters with underscores
invalid_chars = r'<>:"/\|?*'
for char in invalid_chars:
filename = filename.replace(char, '_')
# Zählen der Komplexitätsindikatoren
indicator_count = sum(1 for indicator in complexity_indicators if indicator in task.lower())
# Trim filename if too long
if len(filename) > 100:
filename = filename[:97] + "..."
# Weitere Indikatoren: Textlänge, Anzahl der Anforderungen
length_factor = len(task) > 500
requirements_count = task.lower().count("muss") + task.lower().count("soll") + task.lower().count("should") + task.lower().count("must")
return filename
# Komplexität basierend auf Indikatoren bestimmen
return (indicator_count >= 2) or (length_factor and requirements_count >= 3)
async def _generate_title(self, task: str, document_type: str) -> str:
async def _generate_title(self, prompt: str, context: str) -> str:
"""
Generiert einen Titel für das Dokument.
Generate a title for the document.
Args:
task: Die Aufgabenbeschreibung
document_type: Dokumenttyp
prompt: Task description
context: Document context
Returns:
Generierter Titel
Generated title
"""
if not self.ai_service:
return f"{document_type.capitalize()} Dokument"
return f"Document {uuid.uuid4().hex[:8]}"
prompt = f"""
Erstelle einen prägnanten, professionellen Titel für dieses {document_type}:
title_prompt = f"""
Create a concise, professional title for this document based on the following request:
{task}
{prompt}
Antworte NUR mit dem Titel, nichts anderes.
Reply ONLY with the title, nothing else.
"""
try:
title = await self.ai_service.call_api([
{"role": "system", "content": "Du erstellst Dokumenttitel."},
{"role": "user", "content": prompt}
{"role": "system", "content": "You create precise document titles."},
{"role": "user", "content": title_prompt}
])
# Titel bereinigen
return title.strip('"\'#*- \n\t')
except Exception:
return f"{document_type.capitalize()} Dokument"
# Clean up title
title = title.strip('"\'#*- \n\t')
async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str:
# Return default title if generated title is empty
if not title:
return f"Document {uuid.uuid4().hex[:8]}"
return title
except Exception as e:
logger.warning(f"Error in title generation: {str(e)}")
return f"Document {uuid.uuid4().hex[:8]}"
async def _generate_complex_document(self, prompt: str, context: str, document_type: str,
title: str, output_label: str, output_description: str,
format_type: str) -> str:
"""
Generiert ein komplexes Dokument mit Struktur.
Generate a complex document with structure.
Args:
task: Die Aufgabenbeschreibung
document_type: Dokumenttyp
title: Dokumenttitel
prompt: Task description
context: Document context
document_type: Document type
title: Document title
output_label: Output filename
output_description: Description of desired output
format_type: Output format
Returns:
Generierter Dokumentinhalt
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
return f"# {title}\n\nDocument generation not possible: AI service not available."
prompt = f"""
Erstelle ein umfassendes, gut strukturiertes {document_type} mit dem Titel "{title}" basierend auf:
generation_prompt = f"""
Create a comprehensive, well-structured {document_type} with the title "{title}" based on:
{task}
TASK:
{prompt}
Das Dokument sollte Folgendes enthalten:
1. Eine klare Einleitung mit Zweck und Umfang
2. Logisch organisierte Abschnitte mit Überschriften
3. Detaillierte Inhalte mit Beispielen und Belegen
4. Ein Fazit mit den wichtigsten Erkenntnissen
5. Geeignete Formatierung mit Markdown
CONTEXT:
{context if context else 'No additional context available.'}
Formatiere das Dokument in Markdown mit korrekten Überschriften, Listen und Hervorhebungen.
OUTPUT REQUIREMENTS:
- Filename: {output_label}
- Description: {output_description}
- Format: {format_type}
The document should include:
1. A clear introduction with purpose and scope
2. Logically organized sections with headings
3. Detailed content with examples and evidence
4. A conclusion with key insights
5. Appropriate formatting according to the output format ({format_type})
The document must perfectly match the {format_type} format.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "Du erstellst umfassende, gut strukturierte Dokumentation."},
{"role": "user", "content": prompt}
{"role": "system", "content": f"You create comprehensive, well-structured documentation in {format_type} format."},
{"role": "user", "content": generation_prompt}
])
# Sicherstellen, dass der Titel am Anfang steht
# For markdown format, ensure the title is at the beginning
if format_type == "markdown" and not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
async def _generate_simple_document(self, prompt: str, context: str, document_type: str,
title: str, output_label: str, output_description: str,
format_type: str) -> str:
"""
Generate a simple document without complex structure.
Args:
prompt: Task description
context: Document context
document_type: Document type
title: Document title
output_label: Output filename
output_description: Description of desired output
format_type: Output format
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDocument generation not possible: AI service not available."
generation_prompt = f"""
Create a precise, focused {document_type} with the title "{title}" based on:
TASK:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
OUTPUT REQUIREMENTS:
- Filename: {output_label}
- Description: {output_description}
- Format: {format_type}
The document should be clear, precise, and to the point, without a complex chapter structure.
Format it according to the output format ({format_type}).
The document must perfectly match the {format_type} format.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": f"You create precise, focused documentation in {format_type} format."},
{"role": "user", "content": generation_prompt}
])
# For markdown format, ensure the title is at the beginning
if format_type == "markdown" and not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
async def _generate_default_document(self, prompt: str, context: str, document_type: str, title: str) -> str:
"""
Generate a default markdown document when no specific output specifications are present.
Args:
prompt: Task description
context: Document context
document_type: Document type
title: Document title
Returns:
Generated document content
"""
if not self.ai_service:
return f"# {title}\n\nDocument generation not possible: AI service not available."
generation_prompt = f"""
Create a structured {document_type} with the title "{title}" based on:
TASK:
{prompt}
CONTEXT:
{context if context else 'No additional context available.'}
Format the document with markdown syntax and create a clear, professional structure.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "You create structured documentation in markdown format."},
{"role": "user", "content": generation_prompt}
])
# Ensure the title is at the beginning
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
logger.error(f"Error in document generation: {str(e)}")
return f"# {title}\n\nError in document generation: {str(e)}"
async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str:
"""
Generiert ein einfaches Dokument ohne komplexe Struktur.
Args:
task: Die Aufgabenbeschreibung
document_type: Dokumenttyp
title: Dokumenttitel
Returns:
Generierter Dokumentinhalt
"""
if not self.ai_service:
return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
prompt = f"""
Erstelle ein präzises, fokussiertes {document_type} mit dem Titel "{title}" basierend auf:
{task}
Das Dokument sollte klar, präzise und auf den Punkt sein, ohne komplexe Kapitelstruktur.
Formatiere es mit Markdown und verwende geeignete Überschriften und Formatierungen.
"""
try:
content = await self.ai_service.call_api([
{"role": "system", "content": "Du erstellst präzise, fokussierte Dokumentation."},
{"role": "user", "content": prompt}
])
# Sicherstellen, dass der Titel am Anfang steht
if not content.strip().startswith("# "):
content = f"# {title}\n\n{content}"
return content
except Exception as e:
return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
# Singleton-Instanz
_documentation_agent = None
# Factory function for the Documentation agent
def get_documentation_agent():
"""Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
global _documentation_agent
if _documentation_agent is None:
_documentation_agent = AgentDocumentation()
return _documentation_agent
"""
Factory function that returns an instance of the Documentation agent.
Returns:
An instance of the Documentation agent
"""
return AgentDocumentation()

View file

@ -1,125 +1,137 @@
"""
Webcrawler-Agent für Recherche und Abruf von Informationen aus dem Web.
Angepasst für die neue chat.py Architektur und chat_registry.py.
Webcrawler agent for research and retrieval of information from the web.
Optimized for the new task-based processing.
"""
import json
import logging
import json
import re
import time
from typing import Dict, Any, List, Optional
from typing import Dict, Any, List
from urllib.parse import quote_plus, unquote
from bs4 import BeautifulSoup
import requests
import markdown
from modules.chat_registry import AgentBase
from modules.configuration import APP_CONFIG
logger = logging.getLogger(__name__)
class AgentWebcrawler(AgentBase):
"""Agent für Webrecherche und Informationsabruf"""
"""Agent for web research and information retrieval"""
def __init__(self):
"""Initialisiert den Webcrawler-Agent"""
"""Initialize the webcrawler agent"""
super().__init__()
self.name = "Webscraper"
self.capabilities = "web_search,website_information_retrieval"
self.name = "webcrawler"
self.description = "Conducts web research and collects information from online sources"
self.capabilities = [
"web_search",
"information_retrieval",
"data_collection",
"search_results_analysis",
"webpage_content_extraction"
]
# Web-Crawling-Konfiguration
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS"))
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS"))
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS"))
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT"))
# Web crawling configuration
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5"))
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3"))
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5"))
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30"))
def get_agent_info(self) -> Dict[str, Any]:
"""Gibt Agent-Informationen für die Registry zurück"""
info = super().get_config()
return info
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und führt bei Bedarf eine Webrecherche durch.
Process a standardized task structure and conduct web research.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext
task: A dictionary containing:
- task_id: Unique ID for this task
- prompt: The main instruction for the agent
- input_documents: List of documents to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
Die generierte Antwort oder Ablehnung, wenn keine Webrecherche erforderlich ist
A dictionary containing:
- feedback: Text response explaining the research results
- documents: List of created document objects
"""
# Workflow-ID aus Kontext oder Nachricht extrahieren
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
try:
# Extract relevant task information
prompt = task.get("prompt", "")
output_specs = task.get("output_specifications", [])
# Antwortstruktur erstellen
response = {
"role": "assistant",
"content": "",
"agent_name": self.name,
"workflow_id": workflow_id
# Check if AI service is available
if not self.ai_service:
logger.error("No AI service configured for the Webcrawler agent")
return {
"feedback": "The Webcrawler agent is not properly configured.",
"documents": []
}
try:
# Abfrage aus der Nachricht abrufen
prompt = message.get("content", "").strip()
# Prüfen, ob es sich explizit um eine Webrecherche-Anfrage handelt
# Check if this is a web research request
is_web_research = await self._is_web_research_request(prompt)
if not is_web_research:
# Keine Webrecherche-Anfrage ablehnen
logger.info("Anfrage abgelehnt: keine Webrecherche-Aufgabe")
response["content"] = "Diese Anfrage scheint keine Webrecherche zu erfordern. Weiterleitung an einen passenderen Agenten."
response["status"] = "rejected"
return response
logger.info("Request rejected: not a web research task")
return {
"feedback": "This request doesn't appear to require web research.",
"documents": []
}
# Mit Webrecherche fortfahren
logger.info(f"Webrecherche für: {prompt[:50]}...")
# Suchstrategie vorbereiten
logger.info("Erstelle Suchstrategie")
# Proceed with web research
logger.info(f"Web research for: {prompt[:50]}...")
# Create search strategy
search_strategy = await self._create_search_strategy(prompt)
search_keys = search_strategy.get("skey", [])
search_urls = search_strategy.get("url", [])
if search_keys:
logger.info(f"Suche nach {len(search_keys)} Schlüsselbegriffen: {', '.join(search_keys[:2])}...")
logger.info(f"Searching for {len(search_keys)} key terms: {', '.join(search_keys[:2])}...")
if search_urls:
logger.info(f"Suche in {len(search_urls)} direkten URLs: {', '.join(search_urls[:2])}...")
logger.info(f"Searching in {len(search_urls)} direct URLs: {', '.join(search_urls[:2])}...")
# Suche ausführen
# Execute search
results = []
# Suchbegriffe verarbeiten
# Process search terms
for keyword in search_keys:
logger.info(f"Suche im Web nach: '{keyword}'")
logger.info(f"Searching the web for: '{keyword}'")
keyword_results = self._search_web(keyword)
results.extend(keyword_results)
logger.info(f"Gefunden: {len(keyword_results)} Ergebnisse für '{keyword}'")
logger.info(f"Found: {len(keyword_results)} results for '{keyword}'")
# Direkte URLs verarbeiten
# Process direct URLs
for url in search_urls:
logger.info(f"Extrahiere Inhalt von: {url}")
logger.info(f"Extracting content from: {url}")
soup = self._read_url(url)
# Titel aus der Seite extrahieren, falls vorhanden
# Extract title from the page, if available
title = self._extract_title(soup, url)
result = self._parse_result(soup, title, url)
results.append(result)
logger.info(f"Extrahiert: '{title}' von {url}")
logger.info(f"Extracted: '{title}' from {url}")
# Ergebnisse für die endgültige Ausgabe verarbeiten
logger.info(f"Analysiere {len(results)} Web-Ergebnisse")
# Process results for final output
logger.info(f"Analyzing {len(results)} web results")
# Zusammenfassungen für jedes Ergebnis generieren
# Generate summaries for each result
processed_results = []
for i, result in enumerate(results):
result_data_limited = self._limit_text(result['data'], max_chars=10000)
logger.info(f"Analysiere Ergebnis {i+1}/{len(results)}: {result['title'][:30]}...")
logger.info(f"Analyzing result {i+1}/{len(results)}: {result['title'][:30]}...")
# No AI service available, create minimal summary
if not self.ai_service:
content_summary = f"Extract from {result['url']} ({len(result_data_limited)} characters)"
else:
# Generate summary with AI
content_summary = await self._summarize_result(result_data_limited, prompt)
processed_result = {
@ -131,102 +143,212 @@ class AgentWebcrawler(AgentBase):
processed_results.append(processed_result)
# Gesamtzusammenfassung erstellen
# Create overall summary
all_summaries = "\n\n".join([r["summary"] for r in processed_results])
all_summaries_limited = self._limit_text(all_summaries, max_chars=10000)
logger.info("Erstelle Gesamtzusammenfassung der Webrecherche")
logger.info("Creating overall summary of web research")
if not self.ai_service:
final_summary = f"Summary of {len(processed_results)} web research results"
else:
final_summary = await self.ai_service.call_api([
{"role": "system", "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergebnissen."},
{"role": "user", "content": f"Bitte fasse diese Erkenntnisse in 5-6 Sätzen zusammen: {all_summaries_limited}\n"}
{"role": "system", "content": "You create concise summaries of research results."},
{"role": "user", "content": f"Please summarize these findings in 5-6 sentences: {all_summaries_limited}\n"}
])
# Sprache der Anfrage ermitteln, um Überschriften in der richtigen Sprache zu verwenden
# Get localized headers for output
headers = await self._get_localized_headers(prompt)
# Endgültiges Ergebnis formatieren
final_result = f"## {headers['web_research_results']}\n\n### {headers['summary']}\n{final_summary}\n\n### {headers['detailed_results']}\n"
# Create document objects based on output specifications
generated_documents = []
for i, result in enumerate(processed_results, 1):
final_result += f"\n\n[{i}] {result['title']}\n{headers['url']}: {result['url']}\n{headers['snippet']}: {result['snippet']}\n{headers['content']}: {result['summary']}"
# Generate appropriate document for each requested output
for spec in output_specs:
output_label = spec.get("label", "")
output_description = spec.get("description", "")
# Inhalt in der Antwort setzen
response["content"] = final_result
# Determine output format based on file extension
format_type = self._determine_format_type(output_label)
logger.info("Webrecherche erfolgreich abgeschlossen")
# Generate content based on format and requirements
if format_type == "markdown" or format_type == "text":
content = self._format_results_as_markdown(processed_results, final_summary, headers)
elif format_type == "html":
md_content = self._format_results_as_markdown(processed_results, final_summary, headers)
content = markdown.markdown(md_content)
elif format_type == "json":
content = json.dumps({
"summary": final_summary,
"results": processed_results
}, indent=2, ensure_ascii=False)
elif format_type == "csv":
csv_lines = ["Title,URL,Snippet"]
for result in processed_results:
# Escape commas and quotes in fields
title = result["title"].replace('"', '""')
url = result["url"].replace('"', '""')
snippet = result["snippet"].replace('"', '""')
csv_line = f'"{title}","{url}","{snippet}"'
csv_lines.append(csv_line)
content = "\n".join(csv_lines)
else:
# Default: Markdown
content = self._format_results_as_markdown(processed_results, final_summary, headers)
return response
# Add document to results list
generated_documents.append({
"label": output_label,
"content": content
})
# If no specific outputs requested, return standard document
if not output_specs:
content = self._format_results_as_markdown(processed_results, final_summary, headers)
generated_documents.append({
"label": "web_research_results.md",
"content": content
})
# Create feedback for response
feedback = f"I conducted web research on '{prompt[:50]}...' and found {len(processed_results)} relevant results."
logger.info("Web research completed successfully")
return {
"feedback": feedback,
"documents": generated_documents
}
except Exception as e:
error_msg = f"Fehler bei der Webrecherche: {str(e)}"
error_msg = f"Error during web research: {str(e)}"
logger.error(error_msg)
response["content"] = f"## Fehler bei der Webrecherche\n\n{error_msg}"
return response
return {
"feedback": f"An error occurred during the web research: {str(e)}",
"documents": []
}
def _determine_format_type(self, output_label: str) -> str:
"""
Determine the format type based on the filename.
Args:
output_label: Output filename
Returns:
Format type (markdown, html, text, json, csv)
"""
output_label_lower = output_label.lower()
if output_label_lower.endswith(".md"):
return "markdown"
elif output_label_lower.endswith(".html"):
return "html"
elif output_label_lower.endswith(".txt"):
return "text"
elif output_label_lower.endswith(".json"):
return "json"
elif output_label_lower.endswith(".csv"):
return "csv"
else:
# Default to markdown
return "markdown"
def _format_results_as_markdown(self, results: List[Dict[str, Any]],
summary: str, headers: Dict[str, str]) -> str:
"""
Format research results as markdown.
Args:
results: List of results
summary: Summary of all results
headers: Localized headers
Returns:
Formatted markdown text
"""
md_content = f"# {headers['web_research_results']}\n\n"
md_content += f"## {headers['summary']}\n\n{summary}\n\n"
if results:
md_content += f"## {headers['detailed_results']}\n\n"
for i, result in enumerate(results, 1):
md_content += f"### {i}. {result['title']}\n\n"
md_content += f"**{headers['url']}**: {result['url']}\n\n"
md_content += f"**{headers['snippet']}**: {result['snippet']}\n\n"
md_content += f"**{headers['content']}**: {result['summary']}\n\n"
# Add separator between results (except for the last one)
if i < len(results):
md_content += "---\n\n"
return md_content
async def _is_web_research_request(self, prompt: str) -> bool:
"""
Verwendet KI, um festzustellen, ob eine Anfrage Webrecherche erfordert.
Use AI to determine if a request requires web research.
Args:
prompt: Die Benutzeranfrage
prompt: The user request
Returns:
True, wenn es explizit eine Webrecherche-Anfrage ist, sonst False
True if it is explicitly a web research request, False otherwise
"""
if not self.ai_service:
# Fallback zur einfacheren Erkennung, wenn kein KI-Service verfügbar ist
# Fallback to simpler detection if no AI service is available
return self._simple_web_detection(prompt)
try:
# Prompt erstellen, um zu analysieren, ob es sich um eine Webrecherche-Anfrage handelt
# Create prompt to analyze if this is a web research request
analysis_prompt = f"""
Analysiere die folgende Anfrage und bestimme, ob sie explizit eine Webrecherche oder Online-Informationen erfordert.
Analyze the following request and determine if it explicitly requires web research or online information.
ANFRAGE: {prompt}
REQUEST: {prompt}
Eine Anfrage erfordert Webrecherche, wenn:
1. Sie explizit nach der Suche von Informationen online fragt
2. Sie URLs oder Verweise auf Websites enthält
3. Sie aktuelle Informationen anfordert, die im Web verfügbar wären
4. Sie nach Informationen aus Web-Quellen fragt
5. Sie implizit aktuelle Informationen aus dem Internet erfordert
A request requires web research if:
1. It explicitly asks for searching information online
2. It contains URLs or references to websites
3. It requests current information that would be available on the web
4. It asks for information from web sources
5. It implicitly requires current information from the internet
Antworte NUR mit einem einzelnen Wort - entweder "JA", wenn Webrecherche erforderlich ist, oder "NEIN", wenn nicht.
Füge KEINE Erklärung hinzu, nur die Antwort JA oder NEIN.
Reply ONLY with a single word - either "YES" if web research is required, or "NO" if not.
"""
# KI zur Analyse aufrufen
# Call AI for analysis
response = await self.ai_service.call_api([
{"role": "system", "content": "Du bestimmst, ob eine Anfrage Webrecherche erfordert. Antworte immer nur mit JA oder NEIN."},
{"role": "system", "content": "You determine if a request requires web research. Always respond with just YES or NO."},
{"role": "user", "content": analysis_prompt}
])
# Antwort bereinigen und überprüfen
# Clean response and check
response = response.strip().upper()
return "JA" in response
return "YES" in response
except Exception as e:
# Fehler protokollieren, aber nicht fehlschlagen, Fallback zur einfacheren Erkennung
logger.warning(f"Fehler bei der KI-Erkennung von Webrecherche-Anfragen: {str(e)}")
# Log error but don't fail, fallback to simpler detection
logger.warning(f"Error in AI detection of web research requests: {str(e)}")
return self._simple_web_detection(prompt)
def _simple_web_detection(self, prompt: str) -> bool:
"""
Einfachere Fallback-Methode zur Erkennung von Webrecherche-Anfragen anhand von URLs.
Simpler fallback method for detecting web research requests based on URLs.
Args:
prompt: Die Benutzeranfrage
prompt: The user request
Returns:
True, wenn es klare URL-Indikatoren gibt, sonst False
True if there are clear URL indicators, False otherwise
"""
# URLs in der Anfrage deuten stark auf Webrecherche hin
# URLs in the request strongly indicate web research
url_indicators = ["http://", "https://", "www.", ".com", ".org", ".net", ".edu", ".gov"]
web_terms = ["search", "find online", "look up", "web", "internet", "website", "suche", "finde", "recherchiere"]
web_terms = ["search", "find online", "look up", "web", "internet", "website"]
# Auf URL-Muster in der Anfrage prüfen
# Check for URL patterns in the request
contains_url = any(indicator in prompt.lower() for indicator in url_indicators)
contains_web_term = any(term in prompt.lower() for term in web_terms)
@ -234,100 +356,118 @@ class AgentWebcrawler(AgentBase):
async def _create_search_strategy(self, prompt: str) -> Dict[str, List[str]]:
"""
Erstellt eine Suchstrategie basierend auf der Anfrage.
Create a search strategy based on the request.
Args:
prompt: Die Benutzeranfrage
prompt: The user request
Returns:
Suchstrategie mit URLs und Suchbegriffen
Search strategy with URLs and search terms
"""
if not self.ai_service:
# Fallback zur einfachen Strategie
# Fallback to simple strategy
return {"skey": [prompt], "url": []}
try:
# KI-Prompt zur Erstellung einer Suchstrategie
strategy_prompt = f"""Erstelle eine umfassende Webrecherchestrategie für die Aufgabe = '{prompt.replace("'","")}'. Gib die Ergebnisse als Python-Dictionary mit diesen spezifischen Schlüsseln zurück. Wenn bestimmte URLs angegeben sind und die Aufgabe nur die Analyse dieser URLs erfordert, lass 'skey' leer.
# AI prompt to create a search strategy
strategy_prompt = f"""Create a comprehensive web research strategy for the following task:
'{prompt.replace("'","")}'
'url': Eine Liste von maximal {self.max_url} spezifischen URLs, die aus der Aufgabenstellung extrahiert wurden.
Return the results as a Python dictionary with these specific keys:
'skey': Eine Liste von maximal {self.max_key} Schlüsselsätzen, nach denen im Web gesucht werden soll. Diese sollten präzise, vielfältig und gezielt sein, um die relevantesten Informationen zu erhalten.
'url': A list of up to {self.max_url} specific URLs extracted from the task.
Formatiere deine Antwort als gültiges JSON-Objekt mit diesen beiden Schlüsseln. Füge keinen erklärenden Text oder Markdown außerhalb der Objektdefinition hinzu.
'skey': A list of up to {self.max_key} key phrases to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
If specific URLs are given and the task only requires analyzing these URLs, leave 'skey' empty.
Format your response as a valid JSON object with these two keys. Don't add any explanatory text.
"""
# KI für Suchstrategie aufrufen
# Call AI for search strategy
content_text = await self.ai_service.call_api([
{"role": "system", "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."},
{"role": "system", "content": "You are a web research expert who develops precise search strategies."},
{"role": "user", "content": strategy_prompt}
])
# JSON-Code-Block-Markierungen entfernen, falls vorhanden
# Remove JSON code block markers if present
if content_text.startswith("```json"):
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
content_text = content_text[7:end_index].strip()
elif content_text.startswith("```"):
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
content_text = content_text[3:end_index].strip()
# JSON parsen und zurückgeben
# Extract only the JSON part (if surrounded by text)
json_match = re.search(r'(\{.*\})', content_text, re.DOTALL)
if json_match:
content_text = json_match.group(1)
# Parse JSON and return
strategy = json.loads(content_text)
return strategy
except Exception as e:
logger.error(f"Fehler bei der Erstellung der Suchstrategie: {str(e)}")
# Einfache Fallback-Strategie
logger.error(f"Error creating search strategy: {str(e)}")
# Simple fallback strategy
return {"skey": [prompt], "url": []}
async def _summarize_result(self, result_data: str, original_prompt: str) -> str:
"""
Erstellt eine Zusammenfassung eines Suchergebnisses mit KI.
Create a summary of a search result using AI.
Args:
result_data: Die zu zusammenfassenden Daten
original_prompt: Die ursprüngliche Anfrage
result_data: The data to summarize
original_prompt: The original request
Returns:
Zusammenfassung des Ergebnisses
Summary of the result
"""
if not self.ai_service:
return "Keine Zusammenfassung verfügbar (KI-Service nicht verfügbar)"
return f"Summary of {len(result_data)} characters not available (AI service not available)"
try:
# Anweisungen für die Zusammenfassung
# Instructions for summarization
summary_prompt = f"""
Fasse dieses Suchergebnis gemäß der ursprünglichen Anfrage in etwa 2000 Zeichen zusammen. Ursprüngliche Anfrage = '{original_prompt.replace("'","")}'
Konzentriere dich auf die wichtigsten Erkenntnisse und verbinde sie mit der ursprünglichen Anfrage. Du kannst jede Einleitung überspringen.
Extrahiere nur relevante und hochwertige Informationen im Zusammenhang mit der Anfrage und präsentiere sie in einem klaren Format. Biete eine ausgewogene Ansicht der recherchierten Informationen.
Summarize this search result according to the original request in about 2000 characters.
Hier ist das Suchergebnis:
Original request = '{original_prompt.replace("'","")}'
Focus on the most important findings and connect them to the original request.
Extract only relevant and high-quality information.
Here's the search result:
{result_data}
"""
# KI für Zusammenfassung aufrufen
# Call AI for summary
summary = await self.ai_service.call_api([
{"role": "system", "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."},
{"role": "system", "content": "You are an information analyst who summarizes web content precisely and relevantly."},
{"role": "user", "content": summary_prompt}
])
# Auf ~2000 Zeichen begrenzen
# Limit to ~2000 characters
return summary[:2000]
except Exception as e:
logger.error(f"Fehler bei der Zusammenfassung des Ergebnisses: {str(e)}")
return "Fehler bei der Zusammenfassung"
logger.error(f"Error summarizing result: {str(e)}")
return "Error creating summary"
async def _get_localized_headers(self, text: str) -> Dict[str, str]:
"""
Ermittelt lokalisierte Überschriften für die Webrecherche-Ergebnisse basierend auf der erkannten Sprache.
Determine localized headers for web research results based on detected language.
Args:
text: Text zur Spracherkennung
text: Text for language detection
Returns:
Dictionary mit lokalisierten Überschriften
Dictionary with localized headers
"""
# Standard-Englische Überschriften
# Default English headers
headers = {
"web_research_results": "Web Research Results",
"summary": "Summary",
@ -341,44 +481,22 @@ class AgentWebcrawler(AgentBase):
return headers
try:
# Sprache erkennen
language_prompt = f"In welcher Sprache ist dieser Text geschrieben? Antworte nur mit dem Sprachnamen: {text[:200]}"
# Detect language
language_prompt = f"What language is this text written in? Answer with just the language name: {text[:200]}"
language = await self.ai_service.call_api([
{"role": "system", "content": "Du bestimmst die Sprache eines Textes und gibst nur den Sprachnamen zurück."},
{"role": "system", "content": "You determine the language of a text and return only the language name."},
{"role": "user", "content": language_prompt}
])
language = language.strip().lower()
# Englische Sprache oder Spracherkennung fehlgeschlagen, Standardüberschriften zurückgeben
# English language or language detection failed, return default headers
if language in ["english", "en", ""]:
return headers
# Deutsche Überschriften
if language in ["deutsch", "german", "de"]:
return {
"web_research_results": "Webrecherche-Ergebnisse",
"summary": "Zusammenfassung",
"detailed_results": "Detaillierte Ergebnisse",
"url": "URL",
"snippet": "Ausschnitt",
"content": "Inhalt"
}
# Französische Überschriften
if language in ["französisch", "french", "fr"]:
return {
"web_research_results": "Résultats de recherche Web",
"summary": "Résumé",
"detailed_results": "Résultats détaillés",
"url": "URL",
"snippet": "Extrait",
"content": "Contenu"
}
# Überschriften übersetzen, wenn Sprache erkannt, aber keine vordefinierte Übersetzung
# Translate headers if language recognized but no predefined translation
translation_prompt = f"""
Übersetze diese Webrecherche-Ergebnisüberschriften ins {language}:
Translate these web research result headers to {language}:
Web Research Results
Summary
@ -387,71 +505,73 @@ class AgentWebcrawler(AgentBase):
Snippet
Content
Gib ein JSON-Objekt mit diesen Schlüsseln zurück:
Return a JSON object with these keys:
web_research_results, summary, detailed_results, url, snippet, content
"""
# KI für Übersetzung aufrufen
# Call AI for translation
response = await self.ai_service.call_api([
{"role": "system", "content": "Du übersetzt Überschriften in die angegebene Sprache und gibst sie als JSON zurück."},
{"role": "system", "content": "You translate headers to the specified language and return them as JSON."},
{"role": "user", "content": translation_prompt}
])
# JSON extrahieren
import re
# Extract JSON
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
try:
translated_headers = json.loads(json_match.group(0))
return translated_headers
except json.JSONDecodeError:
logger.warning(f"Error parsing translated headers JSON")
except Exception as e:
# Fehler protokollieren, aber mit englischen Überschriften fortfahren
logger.warning(f"Fehler beim Übersetzen der Überschriften: {str(e)}")
# Log error but continue with English headers
logger.warning(f"Error translating headers: {str(e)}")
return headers
def _search_web(self, query: str) -> List[Dict[str, str]]:
"""
Führt eine Websuche durch und gibt die Ergebnisse zurück.
Conduct a web search and return the results.
Args:
query: Die Suchanfrage
query: The search query
Returns:
Liste von Suchergebnissen
List of search results
"""
formatted_query = quote_plus(query)
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE')}{formatted_query}"
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
search_results_soup = self._read_url(url)
if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'):
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
logger.warning(f"No search results found for: {query}")
return []
# Suchergebnisse extrahieren
# Extract search results
results = []
# Alle Ergebniscontainer finden
# Find all result containers
result_elements = search_results_soup.select('.result')
for result in result_elements:
# Titel extrahieren
# Extract title
title_element = result.select_one('.result__a')
title = title_element.text.strip() if title_element else 'Kein Titel'
title = title_element.text.strip() if title_element else 'No title'
# URL extrahieren (DuckDuckGo verwendet Weiterleitungen)
# Extract URL (DuckDuckGo uses redirects)
url_element = title_element.get('href') if title_element else ''
extracted_url = 'Keine URL'
extracted_url = 'No URL'
if url_element:
# Tatsächliche URL aus DuckDuckGos Weiterleitung extrahieren
# Extract actual URL from DuckDuckGo's redirect
if url_element.startswith('/d.js?q='):
start = url_element.find('?q=') + 3
end = url_element.find('&', start) if '&' in url_element[start:] else None
extracted_url = unquote(url_element[start:end])
# Sicherstellen, dass die URL das korrekte Protokollpräfix hat
# Ensure URL has correct protocol prefix
if not extracted_url.startswith(('http://', 'https://')):
if not extracted_url.startswith('//'):
extracted_url = 'https://' + extracted_url
@ -460,14 +580,14 @@ class AgentWebcrawler(AgentBase):
else:
extracted_url = url_element
# Snippet direkt aus der Suchergebnisseite extrahieren
# Extract snippet directly from search results page
snippet_element = result.select_one('.result__snippet')
snippet = snippet_element.text.strip() if snippet_element else 'Keine Beschreibung'
snippet = snippet_element.text.strip() if snippet_element else 'No description'
# Tatsächlichen Seiteninhalt für das Datenfeld abrufen
# Get actual page content for the data field
target_page_soup = self._read_url(extracted_url)
# Neue Inhaltsextraktionsmethode verwenden, um Inhaltsgröße zu begrenzen
# Use new content extraction method to limit content size
content = self._extract_main_content(target_page_soup)
results.append({
@ -477,7 +597,7 @@ class AgentWebcrawler(AgentBase):
'data': content
})
# Anzahl der Ergebnisse bei Bedarf begrenzen
# Limit number of results if needed
if len(results) >= self.max_result:
break
@ -485,68 +605,68 @@ class AgentWebcrawler(AgentBase):
def _read_url(self, url: str) -> BeautifulSoup:
"""
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
Read a URL and return a BeautifulSoup parser for the content.
Args:
url: Die zu lesende URL
url: The URL to read
Returns:
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
BeautifulSoup object with the content or empty on errors
"""
headers = {
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT"),
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
'Accept': 'text/html,application/xhtml+xml,application/xml',
'Accept-Language': 'en-US,en;q=0.9',
}
try:
# Initiale Anfrage
# Initial request
response = requests.get(url, headers=headers, timeout=self.timeout)
# Abfragen für Status 202
# Handling for status 202
if response.status_code == 202:
# Maximal 3 Versuche mit zunehmenden Intervallen
# Max 3 retries with increasing intervals
backoff_times = [0.5, 1.0, 2.0, 5.0]
for wait_time in backoff_times:
time.sleep(wait_time) # Mit zunehmender Zeit warten
time.sleep(wait_time) # Wait with increasing time
response = requests.get(url, headers=headers, timeout=self.timeout)
# Wenn kein 202 mehr, dann abbrechen
# If no more 202, break
if response.status_code != 202:
break
# Für andere Fehlerstatuscodes einen Fehler auslösen
# Raise for other error status codes
response.raise_for_status()
# HTML parsen
# Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
logger.error(f"Fehler beim Lesen der URL {url}: {str(e)}")
# Leeres BeautifulSoup-Objekt erstellen
logger.error(f"Error reading URL {url}: {str(e)}")
# Create empty BeautifulSoup object
return BeautifulSoup("<html><body></body></html>", 'html.parser')
def _extract_title(self, soup: BeautifulSoup, url: str) -> str:
"""
Extrahiert den Titel aus einer Webseite.
Extract the title from a webpage.
Args:
soup: BeautifulSoup-Objekt der Webseite
url: URL der Webseite
soup: BeautifulSoup object of the webpage
url: URL of the webpage
Returns:
Extrahierter Titel
Extracted title
"""
if not isinstance(soup, BeautifulSoup):
return f"Fehler bei {url}"
return f"Error with {url}"
# Titel aus dem title-Tag extrahieren
# Extract title from title tag
title_tag = soup.find('title')
title = title_tag.text.strip() if title_tag else "Kein Titel"
title = title_tag.text.strip() if title_tag else "No title"
# Alternative: Auch nach h1-Tags suchen, wenn der title-Tag fehlt
if title == "Kein Titel":
# Alternative: Also look for h1 tags if title tag is missing
if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
@ -555,19 +675,19 @@ class AgentWebcrawler(AgentBase):
def _extract_main_content(self, soup: BeautifulSoup, max_chars: int = 10000) -> str:
"""
Extrahiert den Hauptinhalt aus einer HTML-Seite.
Extract the main content from an HTML page.
Args:
soup: BeautifulSoup-Objekt der Webseite
max_chars: Maximale Anzahl von Zeichen
soup: BeautifulSoup object of the webpage
max_chars: Maximum number of characters
Returns:
Extrahierter Hauptinhalt als String
Extracted main content as a string
"""
if not isinstance(soup, BeautifulSoup):
return str(soup)[:max_chars] if soup else ""
# Versuchen, Hauptinhaltselemente in Prioritätsreihenfolge zu finden
# Try to find main content elements in priority order
main_content = None
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
content = soup.select_one(selector)
@ -575,70 +695,71 @@ class AgentWebcrawler(AgentBase):
main_content = content
break
# Wenn kein Hauptinhalt gefunden wurde, den Body verwenden
# If no main content found, use the body
if not main_content:
main_content = soup.find('body') or soup
# Skript-, Style-, Nav-, Footer-Elemente entfernen, die nicht zum Hauptinhalt beitragen
# Remove script, style, nav, footer elements that don't contribute to main content
for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
element.extract()
# Textinhalt extrahieren
# Extract text content
text_content = main_content.get_text(separator=' ', strip=True)
# Auf max_chars begrenzen
# Limit to max_chars
return text_content[:max_chars]
def _parse_result(self, soup: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
"""
Parst ein BeautifulSoup-Objekt in ein Ergebnis-Dictionary.
Parse a BeautifulSoup object into a result dictionary.
Args:
soup: BeautifulSoup-Objekt der Webseite
title: Seitentitel
url: Seiten-URL
soup: BeautifulSoup object of the webpage
title: Page title
url: Page URL
Returns:
Dictionary mit Ergebnisdaten
Dictionary with result data
"""
# Inhalt extrahieren
# Extract content
content = self._extract_main_content(soup)
result = {
'title': title,
'url': url,
'snippet': 'Keine Beschreibung', # Standardwert
'snippet': 'No description', # Default value
'data': content
}
return result
def _limit_text(self, text: str, max_chars: int = 10000) -> str:
"""
Begrenzt den Text auf eine maximale Anzahl von Zeichen.
Limit text to a maximum number of characters.
Args:
text: Eingangstext
max_chars: Maximale Anzahl von Zeichen
text: Input text
max_chars: Maximum number of characters
Returns:
Begrenzter Text
Limited text
"""
if not text:
return ""
# Wenn der Text bereits unter dem Limit liegt, unverändert zurückgeben
# If text is already under the limit, return unchanged
if len(text) <= max_chars:
return text
# Andernfalls den Text auf max_chars begrenzen
return text[:max_chars] + "... [Inhalt aufgrund der Länge gekürzt]"
# Otherwise limit text to max_chars
return text[:max_chars] + "... [Content truncated due to length]"
# Singleton-Instanz
_webcrawler_agent = None
# Factory function for the Webcrawler agent
def get_webcrawler_agent():
"""Gibt eine Singleton-Instanz des Webcrawler-Agenten zurück"""
global _webcrawler_agent
if _webcrawler_agent is None:
_webcrawler_agent = AgentWebcrawler()
return _webcrawler_agent
"""
Factory function that returns an instance of the Webcrawler agent.
Returns:
An instance of the Webcrawler agent
"""
return AgentWebcrawler()

View file

@ -1,207 +1,204 @@
"""
Chat Agent Registry Modul.
Stellt ein zentrales Registry-System für alle verfügbaren Agenten bereit.
Chat Agent Registry Module.
Provides a central registry system for all available agents.
Optimized for the standardized task processing pattern.
"""
import os
import logging
import importlib
import uuid
from datetime import datetime
from typing import Dict, Any, List, Optional
logger = logging.getLogger(__name__)
class AgentBase:
"""
Base class for all chat agents.
Defines the standardized interface for task processing.
"""
def __init__(self):
"""Initialize the base agent."""
self.name = "base-agent"
self.description = "Basic agent functionality"
self.capabilities = []
self.ai_service = None
def set_dependencies(self, ai_service=None):
"""Set external dependencies for the agent."""
self.ai_service = ai_service
def get_agent_info(self) -> Dict[str, Any]:
"""
Return standardized information about the agent's capabilities.
Returns:
Dictionary with name, description, and capabilities
"""
return {
"name": self.name,
"description": self.description,
"capabilities": self.capabilities
}
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
"""
Process a standardized task structure and return results.
This method must be implemented by all concrete agent classes.
Args:
task: A dictionary containing:
- task_id: Unique ID for this task
- workflow_id: ID of the parent workflow (optional)
- prompt: The main instruction for the agent
- input_documents: List of document objects to process
- output_specifications: List of required output documents
- context: Additional contextual information
Returns:
A dictionary containing:
- feedback: Text response explaining what the agent did
- documents: List of document objects created by the agent
"""
# Base implementation - should be overridden by specialized agents
logger.warning(f"Agent {self.name} is using the default implementation of process_task")
return {
"feedback": f"The process_task method was not implemented by agent '{self.name}'.",
"documents": []
}
class AgentRegistry:
"""Zentrale Registry für alle verfügbaren Agenten im System."""
"""Central registry for all available agents in the system."""
_instance = None
@classmethod
def get_instance(cls):
"""Gibt eine Singleton-Instanz der Agent-Registry zurück."""
"""Return a singleton instance of the agent registry."""
if cls._instance is None:
cls._instance = cls()
return cls._instance
def __init__(self):
"""Initialisiert die Agent-Registry."""
"""Initialize the agent registry."""
if AgentRegistry._instance is not None:
raise RuntimeError("Singleton-Instanz existiert bereits - verwende get_instance()")
raise RuntimeError("Singleton instance already exists - use get_instance()")
self.agents = {}
self.ai_service = None
self._load_agents()
def _load_agents(self):
"""Lädt alle verfügbaren Agenten aus den Modulen."""
logger.info("Lade Agent-Module...")
"""Load all available agents from modules."""
logger.info("Loading agent modules...")
# Liste der zu ladenden Agent-Module
# List of agent modules to load
agent_modules = []
agent_dir = os.path.dirname(__file__)
# Durchsuche das Verzeichnis nach Agent-Modulen
# Search the directory for agent modules
for filename in os.listdir(agent_dir):
if filename.startswith("chat_agent_") and filename.endswith(".py"):
agent_modules.append(filename[:-3]) # Entferne .py-Endung
agent_modules.append(filename[:-3]) # Remove .py extension
if not agent_modules:
logger.warning("Keine Agent-Module gefunden")
logger.warning("No agent modules found")
return
logger.info(f"{len(agent_modules)} Agent-Module gefunden")
logger.info(f"{len(agent_modules)} agent modules found")
# Lade jedes Agent-Modul
# Load each agent module
for module_name in agent_modules:
try:
# Importiere das Modul
# Import the module
module = importlib.import_module(f"modules.{module_name}")
# Suche nach der Agent-Klasse oder einer get_*_agent-Funktion
# Look for agent class or get_*_agent function
agent_name = module_name.split('_')[-1]
class_name = f"Agent{agent_name.capitalize()}"
getter_name = f"get_{agent_name}_agent"
agent = None
# Versuche, den Agenten über die get_*_agent-Funktion zu erhalten
# Try to get the agent via the get_*_agent function
if hasattr(module, getter_name):
getter_func = getattr(module, getter_name)
agent = getter_func()
logger.info(f"Agent '{agent.name}' über {getter_name}() geladen")
logger.info(f"Agent '{agent.name}' loaded via {getter_name}()")
# Alternativ versuche, den Agenten direkt zu instanziieren
# Alternatively, try to instantiate the agent directly
elif hasattr(module, class_name):
agent_class = getattr(module, class_name)
agent = agent_class()
logger.info(f"Agent '{agent.name}' (Typ: {agent.name}) direkt instanziert")
logger.info(f"Agent '{agent.name}' directly instantiated")
if agent:
# Registriere den Agenten
# Register the agent
self.register_agent(agent)
else:
logger.warning(f"Keine Agent-Klasse oder Getter-Funktion in Modul {module_name} gefunden")
logger.warning(f"No agent class or getter function found in module {module_name}")
except ImportError as e:
logger.error(f"Modul {module_name} konnte nicht importiert werden: {e}")
logger.error(f"Module {module_name} could not be imported: {e}")
except Exception as e:
logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
logger.error(f"Error loading agent from module {module_name}: {e}")
def set_ai_service(self, ai_service):
"""Set the AI service for all agents."""
self.ai_service = ai_service
self.update_agent_dependencies()
def update_agent_dependencies(self):
"""Aktualisiert die Abhängigkeiten für alle registrierten Agenten."""
"""Update dependencies for all registered agents."""
for agent_id, agent in self.agents.items():
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(ai_service=self.ai_service)
def register_agent(self, agent):
"""
Registriert einen Agenten in der Registry.
Register an agent in the registry.
Args:
agent: Der zu registrierende Agent
agent: The agent to register
"""
agent_id = getattr(agent, 'name', "unknown_agent")
# Initialisiere Agenten mit Abhängigkeiten
# Initialize agent with dependencies
if hasattr(agent, 'set_dependencies'):
agent.set_dependencies(ai_service=self.ai_service)
self.agents[agent_id] = agent
logger.debug(f"Agent '{agent.name}' (Typ: {agent_id}, Name: {agent_id}) registriert")
logger.debug(f"Agent '{agent.name}' registered")
def get_agent(self, agent_identifier: str):
"""
Gibt eine Agenten-Instanz zurück
Return an agent instance
Args:
agent_identifier: ID oder Typ des gewünschten Agenten
agent_identifier: ID or type of the desired agent
Returns:
Agenten-Instanz oder None, falls nicht gefunden
Agent instance or None if not found
"""
if agent_identifier in self.agents:
return self.agents[agent_identifier]
logger.error(f"Agent mit Kennung '{agent_identifier}' nicht gefunden")
logger.error(f"Agent with identifier '{agent_identifier}' not found")
return None
def get_all_agents(self) -> Dict[str, Any]:
"""Gibt alle registrierten Agenten zurück."""
"""Return all registered agents."""
return self.agents
def get_agent_infos(self) -> List[Dict[str, Any]]:
"""Gibt Informationen über alle registrierten Agenten zurück."""
"""Return information about all registered agents."""
agent_infos = []
seen_agents = set()
for agent in self.agents.values():
if agent not in seen_agents:
# Verwende get_agent_info oder erstelle manuell die Info
if hasattr(agent, 'get_agent_info'):
agent_infos.append(agent.get_agent_info())
else:
agent_infos.append({
"name": agent.name,
"capabilities": getattr(agent, 'capabilities', ""),
})
logger.error(f"Agent '{agent.name}' does not show profile.")
seen_agents.add(agent)
return agent_infos
# Basis-Agent-Klasse
class AgentBase:
"""
Basis-Klasse für alle Chat-Agenten.
Definiert die grundlegende Schnittstelle und Funktionalität.
"""
def __init__(self):
"""Initialisiere den Basis-Agenten."""
self.name = "Basis-Agent"
self.capabilities = "Grundlegende Agentenfunktionen"
self.ai_service = None
def set_dependencies(self, ai_service=None):
self.ai_service = ai_service
def get_config(self) -> Dict[str, Any]:
return {
"name": self.name,
"capabilities": self.capabilities,
}
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
# Basisimplementierung - sollte von spezialisierten Agenten überschrieben werden
if not self.ai_service:
logger.warning(f"Agent {self.id} hat keinen konfigurierten AI-Service")
return {
"role": "assistant",
"content": f"Ich bin {self.name}, aber ich bin nicht richtig konfiguriert. Bitte den AI-Service einrichten.",
"agent_name": self.name,
}
# Einfachen Prompt erstellen
prompt = message.get("content", "")
# Antwort generieren
try:
response_content = self.ai_service.call_api([
{"role": "system", "content": f"Du bist {self.name}, ein spezialisierter {self.name}-Agent mit Fähigkeiten in: {self.capabilities}"},
{"role": "user", "content": prompt}
])
return {
"role": "assistant",
"content": response_content,
"agent_name": self.name,
}
except Exception as e:
logger.error(f"Fehler in Agent {self.id}: {str(e)}")
return {
"role": "assistant",
"content": f"Ich habe einen Fehler festgestellt: {str(e)}",
"agent_name": self.name,
}
# Singleton-Factory für die Agent-Registry
# Singleton factory for the agent registry
def get_agent_registry():
return AgentRegistry.get_instance()

View file

@ -1,33 +1,42 @@
....................... TASKS
please revise all chat_agents* modules:
- all comments, logs and outputs in english language
- all ai answers in the language of the user
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
run agent, then save output files to db
. files save-> fileid list, ALWAYS TO WRITE NEW FILES!
. chat_message_to_workflow(role, agent,chatmsg, workflow): with answer and fileidlist
documentation agent:
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
webcrawler_agent:
- there is a try - except mapping problem in the code. please also fix this
-
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
----------------------- OPEN
PRIO1:
Split big files into content-parts
sharepoint connector with document search, content search, content extraction
add connector to myoutlook
Split big files into content-parts
PRIO2:
implement cleanup routines for files in lucydom_interface (File_Management_CLEANUP_INTERVAL): temp older than interval, all orphaned
frontend: no labels definition
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
sharepoint connector with document search, content search, content extraction
add connector to myoutlook
frontend to react
frontend: no labels definition
----------------------- DONE

View file

@ -27,6 +27,7 @@ pandas==2.2.3 # Aktuelle Version beibehalten
## Data Visualization
matplotlib==3.8.0 # Aktuelle Version beibehalten
seaborn==0.13.0
markdown
## Web Scraping & HTTP
beautifulsoup4==4.12.2