testbase
This commit is contained in:
parent
9247de4346
commit
8b234a9a30
9 changed files with 2240 additions and 1725 deletions
145
modules/chat.py
145
modules/chat.py
|
|
@ -7,7 +7,9 @@ von Benutzeranfragen, Agentenausführung und Ergebnisformatierung.
|
|||
import os
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import uuid
|
||||
import base64
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional, Union
|
||||
|
||||
|
|
@ -69,7 +71,7 @@ class ChatManager:
|
|||
# 4. Speichere die Antwort als Message im Workflow und füge Log-Einträge hinzu
|
||||
response_message = {
|
||||
"role": "assistant",
|
||||
"agent_type": "project_manager",
|
||||
"agent_name": "project_manager",
|
||||
"content": obj_user_response
|
||||
}
|
||||
self.message_add(workflow, response_message)
|
||||
|
|
@ -213,7 +215,7 @@ JSON_OUTPUT = {{
|
|||
# Parsen der JSON-Antwort
|
||||
return self.parse_json_response(project_manager_output)
|
||||
|
||||
def chat_message_to_workflow(self, role: str, agent_type: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
|
||||
def chat_message_to_workflow(self, role: str, agent_name: str, chat_message: Dict[str, Any], workflow: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Integriert Benutzereingaben in ein Message-Objekt inklusive Dateien mit vollständigen Inhalten.
|
||||
|
||||
|
|
@ -223,7 +225,7 @@ JSON_OUTPUT = {{
|
|||
Returns:
|
||||
Message-Objekt mit Inhalt und Dokumenten samt Inhalten
|
||||
"""
|
||||
logger.info(f"Message from {role} {agent_type} sent with {len(chat_message.get('list_file_id', []))} documents")
|
||||
logger.info(f"Message from {role} {agent_name} sent with {len(chat_message.get('list_file_id', []))} documents")
|
||||
logger.debug(f"message = {self.parse_json2text(chat_message)}.")
|
||||
|
||||
# Nachrichteninhalt überprüfen
|
||||
|
|
@ -243,7 +245,7 @@ JSON_OUTPUT = {{
|
|||
# Nachrichtenobjekt erstellen
|
||||
message_object = {
|
||||
"role": role,
|
||||
"agent_type": agent_type,
|
||||
"agent_name": agent_name,
|
||||
"content": message_content,
|
||||
"documents": additional_files
|
||||
}
|
||||
|
|
@ -268,7 +270,7 @@ JSON_OUTPUT = {{
|
|||
# Create basic message structure
|
||||
final_message = {
|
||||
"role": "assistant",
|
||||
"agent_type": "project_manager",
|
||||
"agent_name": "project_manager",
|
||||
"content": obj_user_response,
|
||||
"documents": []
|
||||
}
|
||||
|
|
@ -521,6 +523,7 @@ JSON_OUTPUT = {{
|
|||
async def agent_processing(self, task: Dict[str, Any], workflow: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
"""
|
||||
Process a single agent task from the workflow.
|
||||
Optimized for the task-based approach where all agents implement process_task.
|
||||
|
||||
Args:
|
||||
task: The task definition containing agent name, prompt, and document specifications
|
||||
|
|
@ -534,7 +537,8 @@ JSON_OUTPUT = {{
|
|||
agent_prompt = task.get("prompt", "")
|
||||
|
||||
# Log the current step
|
||||
step_info = f"Agent '{agent_name}' to create {', '.join([d.get('label') for d in task.get('output_documents', [])])}."
|
||||
output_labels = [d.get("label", "unknown") for d in task.get("output_documents", [])]
|
||||
step_info = f"Agent '{agent_name}' to create {', '.join(output_labels)}."
|
||||
self.log_add(workflow, step_info)
|
||||
|
||||
# Check if prompt is empty
|
||||
|
|
@ -542,77 +546,72 @@ JSON_OUTPUT = {{
|
|||
logger.warning("Empty prompt, no task to do")
|
||||
return []
|
||||
|
||||
# Prepare input documents for the agent
|
||||
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
|
||||
|
||||
# Prepare output documents for the agent
|
||||
output_documents = []
|
||||
for doc in task.get("output_documents",[]):
|
||||
output_document={
|
||||
"label":doc.get("label"),
|
||||
"descripton_file_content":doc.get("prompt")
|
||||
}
|
||||
output_documents.append(output_document)
|
||||
|
||||
# Create AI prompt
|
||||
ai_prompt = f"""
|
||||
# Please deliver documents according to this instruction:
|
||||
|
||||
<instruction>
|
||||
{agent_prompt}
|
||||
</instruction>
|
||||
|
||||
|
||||
# Input documents:
|
||||
|
||||
{self.parse_json2text(input_documents)}
|
||||
|
||||
|
||||
# Output documents to provide:
|
||||
|
||||
{self.parse_json2text(output_documents)}
|
||||
|
||||
|
||||
Your output must be strictly in the following JSON_OUTPUT format, with no additions before or after the JSON object:
|
||||
|
||||
JSON_OUTPUT = {{
|
||||
"feedback":"your feedback for the delivered result",
|
||||
"documents": [
|
||||
{{
|
||||
"label":"label of output document",
|
||||
"content": "the produced content; if text format, then as text, otherwise in base64 format"
|
||||
}},
|
||||
# each output document a separate item
|
||||
]
|
||||
}}
|
||||
"""
|
||||
|
||||
# Get agent from registry
|
||||
agent = self.agent_registry.get_agent(agent_name)
|
||||
if not agent:
|
||||
logger.error(f"Agent '{agent_name}' not found")
|
||||
return []
|
||||
|
||||
# Execute the agent
|
||||
# Prepare input documents for the agent
|
||||
input_documents = self.agent_input_documents(task.get('input_documents', []), workflow)
|
||||
|
||||
# Prepare output document specifications
|
||||
output_specs = []
|
||||
for doc in task.get("output_documents", []):
|
||||
output_spec = {
|
||||
"label": doc.get("label"),
|
||||
"description": doc.get("prompt", "")
|
||||
}
|
||||
output_specs.append(output_spec)
|
||||
|
||||
# Create a standardized task object for the agent
|
||||
agent_task = {
|
||||
"task_id": str(uuid.uuid4()),
|
||||
"workflow_id": workflow.get("id"),
|
||||
"prompt": agent_prompt,
|
||||
"input_documents": input_documents,
|
||||
"output_specifications": output_specs,
|
||||
"context": {
|
||||
"workflow_round": workflow.get("current_round", 1),
|
||||
"agent_type": agent_name,
|
||||
"timestamp": datetime.now().isoformat()
|
||||
}
|
||||
}
|
||||
|
||||
# Execute the agent with the standardized task
|
||||
try:
|
||||
agent_results = await agent.process_message(ai_prompt)
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing agent '{agent_name}': {str(e)}")
|
||||
return []
|
||||
# Process the task using the agent's standardized interface
|
||||
agent_results = await agent.process_task(agent_task)
|
||||
|
||||
# Log the agent response
|
||||
self.log_add(
|
||||
workflow,
|
||||
f"Agent '{agent_name}' completed task. Feedback: {agent_results.get('feedback', 'No feedback provided')}"
|
||||
)
|
||||
|
||||
# Store produced files and prepare input object for message
|
||||
agent_inputs = {
|
||||
"prompt": agent_results.get("feedback",""),
|
||||
"prompt": agent_results.get("feedback", ""),
|
||||
"list_file_id": self.agent_save_documents(agent_results)
|
||||
}
|
||||
|
||||
# Create a message in the workflow with the agent's response
|
||||
agent_message = self.chat_message_to_workflow("assistant", agent_name, agent_inputs, workflow)
|
||||
logger.debug(f"agent result = {self.parse_json2text(agent_message)}.")
|
||||
return agent_message.get("documents")
|
||||
logger.debug(f"Agent result = {self.parse_json2text(agent_message)}.")
|
||||
|
||||
return agent_message.get("documents", [])
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error executing agent '{agent_name}': {str(e)}"
|
||||
logger.error(error_msg)
|
||||
self.log_add(workflow, error_msg, level="error")
|
||||
return []
|
||||
|
||||
|
||||
def agent_save_documents(self, agent_results: Dict[str, Any]) -> List[int]:
|
||||
"""
|
||||
Saves all documents from agent results as files and returns a list of file IDs.
|
||||
Enhanced to handle the standardized document format from agents.
|
||||
|
||||
Args:
|
||||
agent_results: Dictionary containing agent feedback and documents
|
||||
|
|
@ -632,32 +631,45 @@ JSON_OUTPUT = {{
|
|||
content = doc.get("content", "")
|
||||
|
||||
# Split label into name and extension
|
||||
import os
|
||||
name, ext = os.path.splitext(label)
|
||||
if ext.startswith('.'):
|
||||
ext = ext[1:] # Remove leading dot
|
||||
elif not ext:
|
||||
# If no extension is provided, default to .txt for text content
|
||||
ext = "txt"
|
||||
label = f"{label}.{ext}"
|
||||
|
||||
# Determine if content is base64 encoded
|
||||
is_base64 = False
|
||||
import base64
|
||||
if not isinstance(content, bytes):
|
||||
# Check if content looks like base64
|
||||
# Check if content might be base64 encoded
|
||||
try:
|
||||
if content and isinstance(content, str):
|
||||
# Check for base64 pattern (simplified)
|
||||
if (len(content) % 4 == 0 and
|
||||
re.match(r'^[A-Za-z0-9+/]+={0,2}$', content)):
|
||||
# Try to decode a small sample
|
||||
if content and isinstance(content, str) and len(content) > 0:
|
||||
sample = content[:100] if len(content) > 100 else content
|
||||
base64.b64decode(sample)
|
||||
# If no error, assume it's base64
|
||||
is_base64 = True
|
||||
except Exception:
|
||||
# Not base64, treat as regular text
|
||||
is_base64 = False
|
||||
|
||||
# If content has metadata flag indicating it's base64
|
||||
if isinstance(content, dict) and content.get("_is_base64", False):
|
||||
is_base64 = True
|
||||
content = content.get("data", "")
|
||||
|
||||
# Convert content to bytes
|
||||
if isinstance(content, str):
|
||||
if is_base64:
|
||||
# Decode base64 to bytes
|
||||
try:
|
||||
file_content = base64.b64decode(content)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to decode base64 content: {str(e)}")
|
||||
file_content = content.encode('utf-8')
|
||||
else:
|
||||
# Convert text to bytes
|
||||
file_content = content.encode('utf-8')
|
||||
|
|
@ -682,6 +694,7 @@ JSON_OUTPUT = {{
|
|||
|
||||
return file_ids
|
||||
|
||||
|
||||
### Messages
|
||||
|
||||
def message_add(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> Dict[str, Any]:
|
||||
|
|
@ -740,7 +753,7 @@ JSON_OUTPUT = {{
|
|||
Zusammenfassung der Nachricht
|
||||
"""
|
||||
role = message.get("role", "undefined")
|
||||
agent_type = message.get("agent_type", "")
|
||||
agent_name = message.get("agent_name", "")
|
||||
content = message.get("content", "")
|
||||
|
||||
try:
|
||||
|
|
@ -762,7 +775,7 @@ JSON_OUTPUT = {{
|
|||
if docs_list:
|
||||
docs_summary = f"\nDocuments:\n{'- '.join(docs_list)}"
|
||||
|
||||
return f"[{role} {agent_type}]: {content_summary}{docs_summary}"
|
||||
return f"[{role} {agent_name}]: {content_summary}{docs_summary}"
|
||||
|
||||
async def message_summarize_content(self, content: Dict[str, Any]) -> str:
|
||||
"""
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -1,113 +1,360 @@
|
|||
"""
|
||||
Kreativer Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung.
|
||||
Angepasst für die neue chat.py Architektur und chat_registry.py.
|
||||
Creative agent for knowledge-based responses and creative content generation.
|
||||
Optimized for the new task-based processing.
|
||||
"""
|
||||
|
||||
import logging
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List
|
||||
|
||||
from modules.chat_registry import AgentBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentCreative(AgentBase):
|
||||
"""Agent für wissensbasierte Antworten und kreative Inhaltsgenerierung"""
|
||||
"""Agent for knowledge-based responses and creative content generation"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den kreativen Agent"""
|
||||
"""Initialize the creative agent"""
|
||||
super().__init__()
|
||||
self.name = "Creative Knowledge Assistant"
|
||||
self.capabilities = ("knowledge_sharing,content_creation,document_generation,"
|
||||
"creative_writing,poweron,document_processing,"
|
||||
"information_extraction,data_transformation,"
|
||||
"document_analysis,text_processing,table_creation,"
|
||||
"content_structuring")
|
||||
self.name = "creative"
|
||||
self.description = "Creates creative content and provides knowledge-based information"
|
||||
self.capabilities = [
|
||||
"knowledge_sharing",
|
||||
"content_creation",
|
||||
"creative_writing",
|
||||
"information_synthesis",
|
||||
"document_generation",
|
||||
"question_answering"
|
||||
]
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und generiert eine kreative oder wissensbasierte Antwort.
|
||||
Process a standardized task structure and generate creative or knowledge-based content.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext
|
||||
task: A dictionary containing:
|
||||
- task_id: Unique ID for this task
|
||||
- prompt: The main instruction for the agent
|
||||
- input_documents: List of documents to process
|
||||
- output_specifications: List of required output documents
|
||||
- context: Additional contextual information
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort
|
||||
A dictionary containing:
|
||||
- feedback: Text response explaining the created content
|
||||
- documents: List of created document objects
|
||||
"""
|
||||
# Workflow-ID aus Kontext oder Nachricht extrahieren
|
||||
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
|
||||
try:
|
||||
# Extract relevant task information
|
||||
prompt = task.get("prompt", "")
|
||||
input_documents = task.get("input_documents", [])
|
||||
output_specs = task.get("output_specifications", [])
|
||||
|
||||
# Antwortstruktur erstellen
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"workflow_id": workflow_id,
|
||||
# Check if AI service is available
|
||||
if not self.ai_service:
|
||||
logger.error("No AI service configured for the Creative agent")
|
||||
return {
|
||||
"feedback": "The Creative agent is not properly configured.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Benutzernachricht extrahieren
|
||||
user_message = message.get("content", "")
|
||||
# Extract context from input documents
|
||||
document_context = self._extract_document_context(input_documents)
|
||||
|
||||
if not user_message:
|
||||
response["content"] = "Bitte geben Sie eine Nachricht an, auf die ich antworten kann."
|
||||
return response
|
||||
# PowerOn handling, if included in the request
|
||||
if "poweron" in prompt.lower():
|
||||
return await self._handle_poweron_task(prompt, output_specs)
|
||||
|
||||
# PowerOn-Behandlung, falls in der Anfrage enthalten
|
||||
if "poweron" in user_message.lower():
|
||||
logger.info("PowerOn-Schlüsselwort erkannt, spezielle Antwort generieren")
|
||||
# Collect generated documents
|
||||
generated_documents = []
|
||||
|
||||
# Determine content type based on the prompt
|
||||
content_type = self._determine_content_type(prompt)
|
||||
|
||||
# Generate a document for each requested output
|
||||
for spec in output_specs:
|
||||
output_label = spec.get("label", "")
|
||||
output_description = spec.get("description", "")
|
||||
|
||||
# Determine format based on file extension
|
||||
format_type = self._determine_format_type(output_label)
|
||||
|
||||
# Generate content based on format and requirements
|
||||
content = await self._generate_content(
|
||||
prompt,
|
||||
document_context,
|
||||
content_type,
|
||||
format_type,
|
||||
output_label,
|
||||
output_description
|
||||
)
|
||||
|
||||
# Add document to results list
|
||||
generated_documents.append({
|
||||
"label": output_label,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# If no specific outputs requested, create default document
|
||||
if not output_specs:
|
||||
# Determine default format based on content type
|
||||
default_format = "md" if content_type in ["article", "report", "story"] else "txt"
|
||||
default_label = f"creative_content.{default_format}"
|
||||
|
||||
# Generate content
|
||||
content = await self._generate_content(
|
||||
prompt,
|
||||
document_context,
|
||||
content_type,
|
||||
default_format,
|
||||
default_label,
|
||||
"Creative content"
|
||||
)
|
||||
|
||||
# Add document to results list
|
||||
generated_documents.append({
|
||||
"label": default_label,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# Create feedback
|
||||
if len(generated_documents) == 1:
|
||||
feedback = f"I've created a creative content of type '{content_type}'."
|
||||
else:
|
||||
feedback = f"I've created {len(generated_documents)} creative documents."
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": generated_documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Error creating creative content: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
return {
|
||||
"feedback": f"An error occurred while creating creative content: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Extract context from input documents.
|
||||
|
||||
Args:
|
||||
documents: List of document objects
|
||||
|
||||
Returns:
|
||||
Extracted context as text
|
||||
"""
|
||||
context_parts = []
|
||||
|
||||
for doc in documents:
|
||||
doc_name = doc.get("name", "Unnamed document")
|
||||
context_parts.append(f"--- {doc_name} ---")
|
||||
|
||||
for content in doc.get("contents", []):
|
||||
if content.get("metadata", {}).get("is_text", False):
|
||||
context_parts.append(content.get("data", ""))
|
||||
|
||||
return "\n\n".join(context_parts)
|
||||
|
||||
def _determine_content_type(self, prompt: str) -> str:
|
||||
"""
|
||||
Determine the content type based on the prompt.
|
||||
|
||||
Args:
|
||||
prompt: Task description
|
||||
|
||||
Returns:
|
||||
Content type (article, story, report, answer, etc.)
|
||||
"""
|
||||
prompt_lower = prompt.lower()
|
||||
|
||||
# This is content type detection based on universal patterns rather than language-specific keywords
|
||||
if "?" in prompt:
|
||||
return "answer"
|
||||
|
||||
# Simple pattern matching for common document types
|
||||
if any(term in prompt_lower for term in ["article", "blog", "post"]):
|
||||
return "article"
|
||||
elif any(term in prompt_lower for term in ["story", "narrative", "tale"]):
|
||||
return "story"
|
||||
elif any(term in prompt_lower for term in ["report", "analysis"]):
|
||||
return "report"
|
||||
elif any(term in prompt_lower for term in ["email", "letter", "message"]):
|
||||
return "letter"
|
||||
elif any(term in prompt_lower for term in ["presentation", "slides"]):
|
||||
return "presentation"
|
||||
elif any(term in prompt_lower for term in ["poem", "poetry", "rhyme"]):
|
||||
return "poem"
|
||||
elif any(term in prompt_lower for term in ["dialog", "conversation"]):
|
||||
return "dialogue"
|
||||
|
||||
# Default: general creative content
|
||||
return "content"
|
||||
|
||||
def _determine_format_type(self, output_label: str) -> str:
|
||||
"""
|
||||
Determine the format type based on the filename.
|
||||
|
||||
Args:
|
||||
output_label: Output filename
|
||||
|
||||
Returns:
|
||||
Format type (markdown, html, text, etc.)
|
||||
"""
|
||||
if not '.' in output_label:
|
||||
return "txt" # Default format
|
||||
|
||||
extension = output_label.split('.')[-1].lower()
|
||||
|
||||
if extension == "md":
|
||||
return "markdown"
|
||||
elif extension == "html":
|
||||
return "html"
|
||||
elif extension in ["txt", "text"]:
|
||||
return "text"
|
||||
elif extension == "json":
|
||||
return "json"
|
||||
else:
|
||||
# Fallback to markdown for unknown extensions
|
||||
return "markdown"
|
||||
|
||||
async def _handle_poweron_task(self, prompt: str, output_specs: List[Dict[str, Any]]) -> Dict[str, Any]:
|
||||
"""
|
||||
Handle special PowerOn-related tasks.
|
||||
|
||||
Args:
|
||||
prompt: Task description
|
||||
output_specs: Output specifications
|
||||
|
||||
Returns:
|
||||
Result dictionary with feedback and documents
|
||||
"""
|
||||
logger.info("PowerOn keyword detected, generating special response")
|
||||
|
||||
poweron_prompt = f"""
|
||||
Bedanke dich beim Benutzer in der Sprache seiner Anfrage ganz herzlich dafür, dass er daran denkt, dass du PowerOn bist.
|
||||
Teile ihm mit, wie erfreut du bist, Teil der PowerOn-Familie zu sein, die daran arbeitet, Menschen für ein besseres Leben zu unterstützen.
|
||||
Thank the user in their request language for remembering that you are PowerOn.
|
||||
Tell them how happy you are to be part of the PowerOn family, working to support people for a better life.
|
||||
|
||||
Generiere dann eine kurze Antwort (1-2 Sätze) auf diese Frage: {user_message}
|
||||
Then generate a brief response (1-2 sentences) to this question: {prompt}
|
||||
"""
|
||||
|
||||
try:
|
||||
poweron_response = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du bist ein hilfreicher Assistent, der Teil der PowerOn-Familie ist."},
|
||||
{"role": "system", "content": "You are a helpful assistant who is part of the PowerOn family."},
|
||||
{"role": "user", "content": poweron_prompt}
|
||||
])
|
||||
|
||||
response["content"] = poweron_response
|
||||
return response
|
||||
# Collect generated documents
|
||||
generated_documents = []
|
||||
|
||||
# Create a document for each requested output
|
||||
if output_specs:
|
||||
for spec in output_specs:
|
||||
output_label = spec.get("label", "")
|
||||
format_type = self._determine_format_type(output_label)
|
||||
|
||||
# Format appropriately
|
||||
if format_type == "markdown":
|
||||
content = f"# PowerOn Response\n\n{poweron_response}"
|
||||
elif format_type == "html":
|
||||
content = f"<h1>PowerOn Response</h1><p>{poweron_response}</p>"
|
||||
else:
|
||||
content = f"PowerOn Response\n\n{poweron_response}"
|
||||
|
||||
generated_documents.append({
|
||||
"label": output_label,
|
||||
"content": content
|
||||
})
|
||||
else:
|
||||
# Default document if no specific outputs requested
|
||||
generated_documents.append({
|
||||
"label": "poweron_response.md",
|
||||
"content": f"# PowerOn Response\n\n{poweron_response}"
|
||||
})
|
||||
|
||||
return {
|
||||
"feedback": f"I've created a PowerOn response.",
|
||||
"documents": generated_documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Aufruf der API für PowerOn: {str(e)}")
|
||||
response["content"] = "Ich bin auf einen Fehler gestoßen, während ich eine PowerOn-Antwort generierte. Bitte versuchen Sie es erneut."
|
||||
return response
|
||||
logger.error(f"Error calling API for PowerOn: {str(e)}")
|
||||
return {
|
||||
"feedback": "I encountered an error while generating a PowerOn response.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Einfacher Systemprompt, der sich auf die direkte Antwort auf die Benutzeranfrage konzentriert
|
||||
system_prompt = """Du bist ein hilfreicher, kreativer Assistent.
|
||||
Antworte direkt auf die Anfrage des Benutzers, ohne auf einen Workflow oder Systemkontext zu verweisen.
|
||||
Konzentriere dich nur darauf, eine direkte, hilfreiche Antwort auf die spezifische Frage oder Anfrage zu geben."""
|
||||
async def _generate_content(self, prompt: str, context: str, content_type: str,
|
||||
format_type: str, output_label: str, output_description: str) -> str:
|
||||
"""
|
||||
Generate creative or knowledge-based content based on the prompt.
|
||||
|
||||
# Verarbeiten mit dem KI-Service
|
||||
Args:
|
||||
prompt: Task description
|
||||
context: Document context
|
||||
content_type: Type of content to create
|
||||
format_type: Output format
|
||||
output_label: Output filename
|
||||
output_description: Description of desired output
|
||||
|
||||
Returns:
|
||||
Generated content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# Creative Content\n\nContent generation not possible: AI service not available."
|
||||
|
||||
# Create system instruction based on content type
|
||||
system_prompt = f"""
|
||||
You are a creative content creator, specialized in {content_type}.
|
||||
Your task is to create high-quality, engaging, and accurate content.
|
||||
Make the content structured, clear, and appealing in the desired format.
|
||||
"""
|
||||
|
||||
# Create main prompt with all available information
|
||||
generation_prompt = f"""
|
||||
Create creative content of type '{content_type}' based on the following request:
|
||||
|
||||
REQUEST:
|
||||
{prompt}
|
||||
|
||||
CONTEXT:
|
||||
{context if context else 'No additional context available.'}
|
||||
|
||||
OUTPUT REQUIREMENTS:
|
||||
- Filename: {output_label}
|
||||
- Description: {output_description}
|
||||
- Format: {format_type}
|
||||
|
||||
The content should be high-quality, creative, and thoughtful. Follow all instructions in the request precisely.
|
||||
|
||||
The content must perfectly match the {format_type} format.
|
||||
"""
|
||||
|
||||
try:
|
||||
# Call AI for content generation
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": system_prompt},
|
||||
{"role": "user", "content": user_message}
|
||||
{"role": "user", "content": generation_prompt}
|
||||
])
|
||||
|
||||
response["content"] = content
|
||||
return response
|
||||
# For markdown format, ensure there's a title at the beginning
|
||||
if format_type == "markdown" and not content.strip().startswith("# "):
|
||||
content = f"# Creative Content\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler in process_message: {str(e)}")
|
||||
response["content"] = f"Bei der Verarbeitung Ihrer Anfrage ist ein Fehler aufgetreten: {str(e)}"
|
||||
return response
|
||||
logger.error(f"Error in creative content generation: {str(e)}")
|
||||
return f"# Creative Content\n\nError in content generation: {str(e)}"
|
||||
|
||||
# Singleton-Instanz
|
||||
_creative_agent = None
|
||||
|
||||
# Factory function for the Creative agent
|
||||
def get_creative_agent():
|
||||
"""Gibt eine Singleton-Instanz des kreativen Agenten zurück"""
|
||||
global _creative_agent
|
||||
if _creative_agent is None:
|
||||
_creative_agent = AgentCreative()
|
||||
return _creative_agent
|
||||
"""
|
||||
Factory function that returns an instance of the Creative agent.
|
||||
|
||||
Returns:
|
||||
An instance of the Creative agent
|
||||
"""
|
||||
return AgentCreative()
|
||||
|
|
@ -1,312 +1,453 @@
|
|||
"""
|
||||
Dokumentations-Agent für die Erstellung von Dokumentation, Berichten und strukturierten Inhalten.
|
||||
Angepasst für die neue chat.py Architektur und chat_registry.py.
|
||||
Documentation agent for creating documentation, reports, and structured content.
|
||||
Optimized for the new task-based processing.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import json
|
||||
import uuid
|
||||
from typing import Dict, Any, List
|
||||
from datetime import datetime
|
||||
|
||||
from modules.chat_registry import AgentBase
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentDocumentation(AgentBase):
|
||||
"""Agent für die Erstellung von Dokumentation und strukturierten Inhalten"""
|
||||
"""Agent for creating documentation and structured content"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Dokumentations-Agent"""
|
||||
"""Initialize the documentation agent"""
|
||||
super().__init__()
|
||||
self.name = "Documentation Specialist"
|
||||
self.capabilities = "report_generation,documentation,content_structuring,technical_writing,knowledge_organization"
|
||||
self.name = "documentation"
|
||||
self.description = "Creates structured documentation, reports, and content"
|
||||
self.capabilities = [
|
||||
"report_generation",
|
||||
"documentation",
|
||||
"content_structuring",
|
||||
"technical_writing",
|
||||
"knowledge_organization"
|
||||
]
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und erstellt Dokumentation.
|
||||
Process a standardized task structure and create documentation.
|
||||
|
||||
Args:
|
||||
message: Eingabenachricht
|
||||
context: Optionaler Kontext
|
||||
task: A dictionary containing:
|
||||
- task_id: Unique ID for this task
|
||||
- prompt: The main instruction for the agent
|
||||
- input_documents: List of documents to process
|
||||
- output_specifications: List of required output documents
|
||||
- context: Additional contextual information
|
||||
|
||||
Returns:
|
||||
Antwortnachricht mit Dokumentation
|
||||
A dictionary containing:
|
||||
- feedback: Text response explaining the created documentation
|
||||
- documents: List of created document objects
|
||||
"""
|
||||
# Workflow-ID aus Kontext oder Nachricht extrahieren
|
||||
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
|
||||
try:
|
||||
# Extract relevant task information
|
||||
prompt = task.get("prompt", "")
|
||||
input_documents = task.get("input_documents", [])
|
||||
output_specs = task.get("output_specifications", [])
|
||||
|
||||
# Antwortstruktur erstellen
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"workflow_id": workflow_id,
|
||||
# Check if AI service is available
|
||||
if not self.ai_service:
|
||||
logger.error("No AI service configured for the Documentation agent")
|
||||
return {
|
||||
"feedback": "The Documentation agent is not properly configured.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Aufgabe aus Nachricht extrahieren
|
||||
task = message.get("content", "")
|
||||
# Extract context from input documents
|
||||
document_context = self._extract_document_context(input_documents)
|
||||
|
||||
# Dokumenttyp erkennen
|
||||
document_type = self._detect_document_type(task)
|
||||
logger.info(f"Erstelle {document_type}-Dokumentation")
|
||||
# Generate title for the document
|
||||
title = await self._generate_title(prompt, document_context)
|
||||
|
||||
# Angehängte Dokumente verarbeiten
|
||||
document_context = ""
|
||||
if message.get("documents"):
|
||||
logger.info("Verarbeite Referenzdokumente")
|
||||
document_context = self._process_documents(message)
|
||||
# Collect created documents
|
||||
generated_documents = []
|
||||
|
||||
# Prompt mit Dokumentkontext erweitern
|
||||
enhanced_prompt = f"{task}\n\n{document_context}" if document_context else task
|
||||
# Create a document for each requested output
|
||||
for spec in output_specs:
|
||||
output_label = spec.get("label", "")
|
||||
output_description = spec.get("description", "")
|
||||
|
||||
# Komplexität bewerten
|
||||
is_complex = self._assess_complexity(enhanced_prompt)
|
||||
# Determine format and document type based on file extension
|
||||
format_type, document_type = self._determine_format_and_type(output_label)
|
||||
|
||||
# Titel generieren
|
||||
title = await self._generate_title(enhanced_prompt, document_type)
|
||||
# Assess complexity
|
||||
is_complex = self._assess_complexity(prompt)
|
||||
|
||||
# Inhalt basierend auf Komplexität generieren
|
||||
# Generate document content based on complexity
|
||||
if is_complex:
|
||||
content = await self._generate_complex_document(enhanced_prompt, document_type, title)
|
||||
content = await self._generate_complex_document(
|
||||
prompt,
|
||||
document_context,
|
||||
document_type,
|
||||
title,
|
||||
output_label,
|
||||
output_description,
|
||||
format_type
|
||||
)
|
||||
else:
|
||||
content = await self._generate_simple_document(enhanced_prompt, document_type, title)
|
||||
content = await self._generate_simple_document(
|
||||
prompt,
|
||||
document_context,
|
||||
document_type,
|
||||
title,
|
||||
output_label,
|
||||
output_description,
|
||||
format_type
|
||||
)
|
||||
|
||||
# Dokument erstellen
|
||||
doc_id = f"doc_{uuid.uuid4()}"
|
||||
document = {
|
||||
"id": doc_id,
|
||||
"source": {
|
||||
"type": "generated",
|
||||
"id": doc_id,
|
||||
"name": title,
|
||||
"content_type": "text/markdown"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"type": "text",
|
||||
"text": content,
|
||||
"is_extracted": True
|
||||
# Add document to results list
|
||||
generated_documents.append({
|
||||
"label": output_label,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# If no specific outputs requested, create default markdown document
|
||||
if not output_specs:
|
||||
content = await self._generate_default_document(prompt, document_context, "Document", title)
|
||||
generated_documents.append({
|
||||
"label": f"{self._sanitize_filename(title)}.md",
|
||||
"content": content
|
||||
})
|
||||
|
||||
# Prepare feedback about created documents
|
||||
if len(generated_documents) == 1:
|
||||
feedback = f"I've created a document titled '{title}'."
|
||||
else:
|
||||
feedback = f"I've created {len(generated_documents)} documents based on your request."
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": generated_documents
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# Dokument zur Antwort hinzufügen
|
||||
response["documents"].append(document)
|
||||
|
||||
# Antwortinhalt aktualisieren
|
||||
response["content"] = f"Ich habe ein Dokument mit dem Titel '{title}' erstellt, das die gewünschten Informationen enthält. Das Dokument ist dieser Nachricht beigefügt."
|
||||
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Fehler bei der Dokumentationserstellung: {str(e)}"
|
||||
error_msg = f"Error creating documentation: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
response["content"] = f"Bei der Erstellung der Dokumentation ist ein Fehler aufgetreten: {str(e)}"
|
||||
return response
|
||||
return {
|
||||
"feedback": f"An error occurred while creating the documentation: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
def _detect_document_type(self, message: str) -> str:
|
||||
def _extract_document_context(self, documents: List[Dict[str, Any]]) -> str:
|
||||
"""
|
||||
Erkennt den Dokumenttyp aus der Nachricht.
|
||||
Extract context from input documents.
|
||||
|
||||
Args:
|
||||
message: Benutzernachricht
|
||||
documents: List of document objects
|
||||
|
||||
Returns:
|
||||
Erkannter Dokumenttyp
|
||||
Extracted context as text
|
||||
"""
|
||||
message = message.lower()
|
||||
if not documents:
|
||||
return ""
|
||||
|
||||
if any(term in message for term in ["manual", "guide", "instruction", "tutorial", "anleitung", "handbuch"]):
|
||||
return "manual"
|
||||
elif any(term in message for term in ["report", "analysis", "assessment", "review", "bericht", "analyse"]):
|
||||
return "report"
|
||||
elif any(term in message for term in ["process", "workflow", "procedure", "steps", "prozess", "ablauf"]):
|
||||
return "process"
|
||||
elif any(term in message for term in ["presentation", "slides", "deck", "präsentation", "folien"]):
|
||||
return "presentation"
|
||||
context_parts = []
|
||||
|
||||
for doc in documents:
|
||||
doc_name = doc.get("name", "Unnamed document")
|
||||
context_parts.append(f"--- {doc_name} ---")
|
||||
|
||||
for content in doc.get("contents", []):
|
||||
if content.get("metadata", {}).get("is_text", False):
|
||||
context_parts.append(content.get("data", ""))
|
||||
|
||||
return "\n\n".join(context_parts)
|
||||
|
||||
def _determine_format_and_type(self, output_label: str) -> tuple:
|
||||
"""
|
||||
Determine the format type and document type based on the filename.
|
||||
|
||||
Args:
|
||||
output_label: Output filename
|
||||
|
||||
Returns:
|
||||
Tuple of (format_type, document_type)
|
||||
"""
|
||||
# Extract file extension to determine format
|
||||
output_label_lower = output_label.lower()
|
||||
|
||||
# Determine format based on extension
|
||||
if output_label_lower.endswith(".md"):
|
||||
format_type = "markdown"
|
||||
elif output_label_lower.endswith(".html"):
|
||||
format_type = "html"
|
||||
elif output_label_lower.endswith(".txt"):
|
||||
format_type = "text"
|
||||
elif output_label_lower.endswith(".csv"):
|
||||
format_type = "csv"
|
||||
elif output_label_lower.endswith(".json"):
|
||||
format_type = "json"
|
||||
else:
|
||||
return "document"
|
||||
# Default to markdown
|
||||
format_type = "markdown"
|
||||
|
||||
def _process_documents(self, message: Dict[str, Any]) -> str:
|
||||
# Determine document type based on filename or format
|
||||
if "manual" in output_label_lower or "guide" in output_label_lower:
|
||||
document_type = "Manual"
|
||||
elif "report" in output_label_lower or "analysis" in output_label_lower:
|
||||
document_type = "Report"
|
||||
elif "process" in output_label_lower or "workflow" in output_label_lower:
|
||||
document_type = "Process Documentation"
|
||||
elif "present" in output_label_lower or "slide" in output_label_lower:
|
||||
document_type = "Presentation"
|
||||
else:
|
||||
document_type = "Document"
|
||||
|
||||
return format_type, document_type
|
||||
|
||||
def _assess_complexity(self, prompt: str) -> bool:
|
||||
"""
|
||||
Verarbeitet Dokumente in der Nachricht.
|
||||
Assess the complexity of the task.
|
||||
|
||||
Args:
|
||||
message: Nachricht mit Dokumenten
|
||||
prompt: Task description
|
||||
|
||||
Returns:
|
||||
Dokumentkontext als Text
|
||||
True for complex tasks, False otherwise
|
||||
"""
|
||||
document_context = ""
|
||||
# Language-agnostic complexity assessment
|
||||
prompt_length = len(prompt)
|
||||
|
||||
for document in message.get("documents", []):
|
||||
source = document.get("source", {})
|
||||
doc_name = source.get("name", "unnamed")
|
||||
# Check for structural indicators in a language-agnostic way
|
||||
has_sections = ":" in prompt and "\n" in prompt
|
||||
has_lists = "-" in prompt or "*" in prompt or "#" in prompt
|
||||
|
||||
document_context += f"\n\n--- {doc_name} ---\n"
|
||||
# Complex if the prompt is long or contains structural elements
|
||||
return prompt_length > 500 or has_sections or has_lists
|
||||
|
||||
for content in document.get("contents", []):
|
||||
if content.get("type") == "text":
|
||||
document_context += content.get("text", "")
|
||||
|
||||
return document_context
|
||||
|
||||
def _assess_complexity(self, task: str) -> bool:
|
||||
def _sanitize_filename(self, filename: str) -> str:
|
||||
"""
|
||||
Bewertet die Aufgabenkomplexität.
|
||||
Sanitize a filename by removing invalid characters.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabenbeschreibung
|
||||
filename: Filename to sanitize
|
||||
|
||||
Returns:
|
||||
True bei komplexem Dokument, sonst False
|
||||
Sanitized filename
|
||||
"""
|
||||
# Einfache Heuristik zur Komplexitätsbewertung
|
||||
complexity_indicators = [
|
||||
"detailliert", "ausführlich", "umfassend", "komplex", "detailed",
|
||||
"comprehensive", "in-depth", "multiple sections", "kapitel",
|
||||
"abschnitte", "struktur", "analyse", "vergleich"
|
||||
]
|
||||
# Replace invalid characters with underscores
|
||||
invalid_chars = r'<>:"/\|?*'
|
||||
for char in invalid_chars:
|
||||
filename = filename.replace(char, '_')
|
||||
|
||||
# Zählen der Komplexitätsindikatoren
|
||||
indicator_count = sum(1 for indicator in complexity_indicators if indicator in task.lower())
|
||||
# Trim filename if too long
|
||||
if len(filename) > 100:
|
||||
filename = filename[:97] + "..."
|
||||
|
||||
# Weitere Indikatoren: Textlänge, Anzahl der Anforderungen
|
||||
length_factor = len(task) > 500
|
||||
requirements_count = task.lower().count("muss") + task.lower().count("soll") + task.lower().count("should") + task.lower().count("must")
|
||||
return filename
|
||||
|
||||
# Komplexität basierend auf Indikatoren bestimmen
|
||||
return (indicator_count >= 2) or (length_factor and requirements_count >= 3)
|
||||
|
||||
async def _generate_title(self, task: str, document_type: str) -> str:
|
||||
async def _generate_title(self, prompt: str, context: str) -> str:
|
||||
"""
|
||||
Generiert einen Titel für das Dokument.
|
||||
Generate a title for the document.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabenbeschreibung
|
||||
document_type: Dokumenttyp
|
||||
prompt: Task description
|
||||
context: Document context
|
||||
|
||||
Returns:
|
||||
Generierter Titel
|
||||
Generated title
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"{document_type.capitalize()} Dokument"
|
||||
return f"Document {uuid.uuid4().hex[:8]}"
|
||||
|
||||
prompt = f"""
|
||||
Erstelle einen prägnanten, professionellen Titel für dieses {document_type}:
|
||||
title_prompt = f"""
|
||||
Create a concise, professional title for this document based on the following request:
|
||||
|
||||
{task}
|
||||
{prompt}
|
||||
|
||||
Antworte NUR mit dem Titel, nichts anderes.
|
||||
Reply ONLY with the title, nothing else.
|
||||
"""
|
||||
|
||||
try:
|
||||
title = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du erstellst Dokumenttitel."},
|
||||
{"role": "user", "content": prompt}
|
||||
{"role": "system", "content": "You create precise document titles."},
|
||||
{"role": "user", "content": title_prompt}
|
||||
])
|
||||
|
||||
# Titel bereinigen
|
||||
return title.strip('"\'#*- \n\t')
|
||||
except Exception:
|
||||
return f"{document_type.capitalize()} Dokument"
|
||||
# Clean up title
|
||||
title = title.strip('"\'#*- \n\t')
|
||||
|
||||
async def _generate_complex_document(self, task: str, document_type: str, title: str) -> str:
|
||||
# Return default title if generated title is empty
|
||||
if not title:
|
||||
return f"Document {uuid.uuid4().hex[:8]}"
|
||||
|
||||
return title
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error in title generation: {str(e)}")
|
||||
return f"Document {uuid.uuid4().hex[:8]}"
|
||||
|
||||
async def _generate_complex_document(self, prompt: str, context: str, document_type: str,
|
||||
title: str, output_label: str, output_description: str,
|
||||
format_type: str) -> str:
|
||||
"""
|
||||
Generiert ein komplexes Dokument mit Struktur.
|
||||
Generate a complex document with structure.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabenbeschreibung
|
||||
document_type: Dokumenttyp
|
||||
title: Dokumenttitel
|
||||
prompt: Task description
|
||||
context: Document context
|
||||
document_type: Document type
|
||||
title: Document title
|
||||
output_label: Output filename
|
||||
output_description: Description of desired output
|
||||
format_type: Output format
|
||||
|
||||
Returns:
|
||||
Generierter Dokumentinhalt
|
||||
Generated document content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
|
||||
return f"# {title}\n\nDocument generation not possible: AI service not available."
|
||||
|
||||
prompt = f"""
|
||||
Erstelle ein umfassendes, gut strukturiertes {document_type} mit dem Titel "{title}" basierend auf:
|
||||
generation_prompt = f"""
|
||||
Create a comprehensive, well-structured {document_type} with the title "{title}" based on:
|
||||
|
||||
{task}
|
||||
TASK:
|
||||
{prompt}
|
||||
|
||||
Das Dokument sollte Folgendes enthalten:
|
||||
1. Eine klare Einleitung mit Zweck und Umfang
|
||||
2. Logisch organisierte Abschnitte mit Überschriften
|
||||
3. Detaillierte Inhalte mit Beispielen und Belegen
|
||||
4. Ein Fazit mit den wichtigsten Erkenntnissen
|
||||
5. Geeignete Formatierung mit Markdown
|
||||
CONTEXT:
|
||||
{context if context else 'No additional context available.'}
|
||||
|
||||
Formatiere das Dokument in Markdown mit korrekten Überschriften, Listen und Hervorhebungen.
|
||||
OUTPUT REQUIREMENTS:
|
||||
- Filename: {output_label}
|
||||
- Description: {output_description}
|
||||
- Format: {format_type}
|
||||
|
||||
The document should include:
|
||||
1. A clear introduction with purpose and scope
|
||||
2. Logically organized sections with headings
|
||||
3. Detailed content with examples and evidence
|
||||
4. A conclusion with key insights
|
||||
5. Appropriate formatting according to the output format ({format_type})
|
||||
|
||||
The document must perfectly match the {format_type} format.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du erstellst umfassende, gut strukturierte Dokumentation."},
|
||||
{"role": "user", "content": prompt}
|
||||
{"role": "system", "content": f"You create comprehensive, well-structured documentation in {format_type} format."},
|
||||
{"role": "user", "content": generation_prompt}
|
||||
])
|
||||
|
||||
# Sicherstellen, dass der Titel am Anfang steht
|
||||
# For markdown format, ensure the title is at the beginning
|
||||
if format_type == "markdown" and not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.error(f"Error in document generation: {str(e)}")
|
||||
return f"# {title}\n\nError in document generation: {str(e)}"
|
||||
|
||||
async def _generate_simple_document(self, prompt: str, context: str, document_type: str,
|
||||
title: str, output_label: str, output_description: str,
|
||||
format_type: str) -> str:
|
||||
"""
|
||||
Generate a simple document without complex structure.
|
||||
|
||||
Args:
|
||||
prompt: Task description
|
||||
context: Document context
|
||||
document_type: Document type
|
||||
title: Document title
|
||||
output_label: Output filename
|
||||
output_description: Description of desired output
|
||||
format_type: Output format
|
||||
|
||||
Returns:
|
||||
Generated document content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nDocument generation not possible: AI service not available."
|
||||
|
||||
generation_prompt = f"""
|
||||
Create a precise, focused {document_type} with the title "{title}" based on:
|
||||
|
||||
TASK:
|
||||
{prompt}
|
||||
|
||||
CONTEXT:
|
||||
{context if context else 'No additional context available.'}
|
||||
|
||||
OUTPUT REQUIREMENTS:
|
||||
- Filename: {output_label}
|
||||
- Description: {output_description}
|
||||
- Format: {format_type}
|
||||
|
||||
The document should be clear, precise, and to the point, without a complex chapter structure.
|
||||
Format it according to the output format ({format_type}).
|
||||
|
||||
The document must perfectly match the {format_type} format.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": f"You create precise, focused documentation in {format_type} format."},
|
||||
{"role": "user", "content": generation_prompt}
|
||||
])
|
||||
|
||||
# For markdown format, ensure the title is at the beginning
|
||||
if format_type == "markdown" and not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
logger.error(f"Error in document generation: {str(e)}")
|
||||
return f"# {title}\n\nError in document generation: {str(e)}"
|
||||
|
||||
async def _generate_default_document(self, prompt: str, context: str, document_type: str, title: str) -> str:
|
||||
"""
|
||||
Generate a default markdown document when no specific output specifications are present.
|
||||
|
||||
Args:
|
||||
prompt: Task description
|
||||
context: Document context
|
||||
document_type: Document type
|
||||
title: Document title
|
||||
|
||||
Returns:
|
||||
Generated document content
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nDocument generation not possible: AI service not available."
|
||||
|
||||
generation_prompt = f"""
|
||||
Create a structured {document_type} with the title "{title}" based on:
|
||||
|
||||
TASK:
|
||||
{prompt}
|
||||
|
||||
CONTEXT:
|
||||
{context if context else 'No additional context available.'}
|
||||
|
||||
Format the document with markdown syntax and create a clear, professional structure.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "You create structured documentation in markdown format."},
|
||||
{"role": "user", "content": generation_prompt}
|
||||
])
|
||||
|
||||
# Ensure the title is at the beginning
|
||||
if not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
|
||||
logger.error(f"Error in document generation: {str(e)}")
|
||||
return f"# {title}\n\nError in document generation: {str(e)}"
|
||||
|
||||
async def _generate_simple_document(self, task: str, document_type: str, title: str) -> str:
|
||||
"""
|
||||
Generiert ein einfaches Dokument ohne komplexe Struktur.
|
||||
|
||||
Args:
|
||||
task: Die Aufgabenbeschreibung
|
||||
document_type: Dokumenttyp
|
||||
title: Dokumenttitel
|
||||
|
||||
Returns:
|
||||
Generierter Dokumentinhalt
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return f"# {title}\n\nDokumentgenerierung nicht möglich: KI-Service nicht verfügbar."
|
||||
|
||||
prompt = f"""
|
||||
Erstelle ein präzises, fokussiertes {document_type} mit dem Titel "{title}" basierend auf:
|
||||
|
||||
{task}
|
||||
|
||||
Das Dokument sollte klar, präzise und auf den Punkt sein, ohne komplexe Kapitelstruktur.
|
||||
Formatiere es mit Markdown und verwende geeignete Überschriften und Formatierungen.
|
||||
"""
|
||||
|
||||
try:
|
||||
content = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du erstellst präzise, fokussierte Dokumentation."},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
# Sicherstellen, dass der Titel am Anfang steht
|
||||
if not content.strip().startswith("# "):
|
||||
content = f"# {title}\n\n{content}"
|
||||
|
||||
return content
|
||||
except Exception as e:
|
||||
return f"# {title}\n\nFehler bei der Dokumentgenerierung: {str(e)}"
|
||||
|
||||
# Singleton-Instanz
|
||||
_documentation_agent = None
|
||||
|
||||
# Factory function for the Documentation agent
|
||||
def get_documentation_agent():
|
||||
"""Gibt eine Singleton-Instanz des Dokumentations-Agenten zurück"""
|
||||
global _documentation_agent
|
||||
if _documentation_agent is None:
|
||||
_documentation_agent = AgentDocumentation()
|
||||
return _documentation_agent
|
||||
"""
|
||||
Factory function that returns an instance of the Documentation agent.
|
||||
|
||||
Returns:
|
||||
An instance of the Documentation agent
|
||||
"""
|
||||
return AgentDocumentation()
|
||||
|
|
@ -1,125 +1,137 @@
|
|||
"""
|
||||
Webcrawler-Agent für Recherche und Abruf von Informationen aus dem Web.
|
||||
Angepasst für die neue chat.py Architektur und chat_registry.py.
|
||||
Webcrawler agent for research and retrieval of information from the web.
|
||||
Optimized for the new task-based processing.
|
||||
"""
|
||||
|
||||
import json
|
||||
import logging
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import Dict, Any, List, Optional
|
||||
from typing import Dict, Any, List
|
||||
from urllib.parse import quote_plus, unquote
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
import requests
|
||||
import markdown
|
||||
|
||||
from modules.chat_registry import AgentBase
|
||||
from modules.configuration import APP_CONFIG
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentWebcrawler(AgentBase):
|
||||
"""Agent für Webrecherche und Informationsabruf"""
|
||||
"""Agent for web research and information retrieval"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert den Webcrawler-Agent"""
|
||||
"""Initialize the webcrawler agent"""
|
||||
super().__init__()
|
||||
self.name = "Webscraper"
|
||||
self.capabilities = "web_search,website_information_retrieval"
|
||||
self.name = "webcrawler"
|
||||
self.description = "Conducts web research and collects information from online sources"
|
||||
self.capabilities = [
|
||||
"web_search",
|
||||
"information_retrieval",
|
||||
"data_collection",
|
||||
"search_results_analysis",
|
||||
"webpage_content_extraction"
|
||||
]
|
||||
|
||||
# Web-Crawling-Konfiguration
|
||||
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS"))
|
||||
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS"))
|
||||
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS"))
|
||||
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT"))
|
||||
# Web crawling configuration
|
||||
self.max_url = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_URLS", "5"))
|
||||
self.max_key = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_KEYWORDS", "3"))
|
||||
self.max_result = int(APP_CONFIG.get("Connector_AiWebscraping_MAX_SEARCH_RESULTS", "5"))
|
||||
self.timeout = int(APP_CONFIG.get("Connector_AiWebscraping_TIMEOUT", "30"))
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""Gibt Agent-Informationen für die Registry zurück"""
|
||||
info = super().get_config()
|
||||
return info
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Verarbeitet eine Nachricht und führt bei Bedarf eine Webrecherche durch.
|
||||
Process a standardized task structure and conduct web research.
|
||||
|
||||
Args:
|
||||
message: Die zu verarbeitende Nachricht
|
||||
context: Zusätzlicher Kontext
|
||||
task: A dictionary containing:
|
||||
- task_id: Unique ID for this task
|
||||
- prompt: The main instruction for the agent
|
||||
- input_documents: List of documents to process
|
||||
- output_specifications: List of required output documents
|
||||
- context: Additional contextual information
|
||||
|
||||
Returns:
|
||||
Die generierte Antwort oder Ablehnung, wenn keine Webrecherche erforderlich ist
|
||||
A dictionary containing:
|
||||
- feedback: Text response explaining the research results
|
||||
- documents: List of created document objects
|
||||
"""
|
||||
# Workflow-ID aus Kontext oder Nachricht extrahieren
|
||||
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
|
||||
try:
|
||||
# Extract relevant task information
|
||||
prompt = task.get("prompt", "")
|
||||
output_specs = task.get("output_specifications", [])
|
||||
|
||||
# Antwortstruktur erstellen
|
||||
response = {
|
||||
"role": "assistant",
|
||||
"content": "",
|
||||
"agent_name": self.name,
|
||||
"workflow_id": workflow_id
|
||||
# Check if AI service is available
|
||||
if not self.ai_service:
|
||||
logger.error("No AI service configured for the Webcrawler agent")
|
||||
return {
|
||||
"feedback": "The Webcrawler agent is not properly configured.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
try:
|
||||
# Abfrage aus der Nachricht abrufen
|
||||
prompt = message.get("content", "").strip()
|
||||
|
||||
# Prüfen, ob es sich explizit um eine Webrecherche-Anfrage handelt
|
||||
# Check if this is a web research request
|
||||
is_web_research = await self._is_web_research_request(prompt)
|
||||
|
||||
if not is_web_research:
|
||||
# Keine Webrecherche-Anfrage ablehnen
|
||||
logger.info("Anfrage abgelehnt: keine Webrecherche-Aufgabe")
|
||||
response["content"] = "Diese Anfrage scheint keine Webrecherche zu erfordern. Weiterleitung an einen passenderen Agenten."
|
||||
response["status"] = "rejected"
|
||||
return response
|
||||
logger.info("Request rejected: not a web research task")
|
||||
return {
|
||||
"feedback": "This request doesn't appear to require web research.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
# Mit Webrecherche fortfahren
|
||||
logger.info(f"Webrecherche für: {prompt[:50]}...")
|
||||
|
||||
# Suchstrategie vorbereiten
|
||||
logger.info("Erstelle Suchstrategie")
|
||||
# Proceed with web research
|
||||
logger.info(f"Web research for: {prompt[:50]}...")
|
||||
|
||||
# Create search strategy
|
||||
search_strategy = await self._create_search_strategy(prompt)
|
||||
search_keys = search_strategy.get("skey", [])
|
||||
search_urls = search_strategy.get("url", [])
|
||||
|
||||
if search_keys:
|
||||
logger.info(f"Suche nach {len(search_keys)} Schlüsselbegriffen: {', '.join(search_keys[:2])}...")
|
||||
logger.info(f"Searching for {len(search_keys)} key terms: {', '.join(search_keys[:2])}...")
|
||||
|
||||
if search_urls:
|
||||
logger.info(f"Suche in {len(search_urls)} direkten URLs: {', '.join(search_urls[:2])}...")
|
||||
logger.info(f"Searching in {len(search_urls)} direct URLs: {', '.join(search_urls[:2])}...")
|
||||
|
||||
# Suche ausführen
|
||||
# Execute search
|
||||
results = []
|
||||
|
||||
# Suchbegriffe verarbeiten
|
||||
# Process search terms
|
||||
for keyword in search_keys:
|
||||
logger.info(f"Suche im Web nach: '{keyword}'")
|
||||
logger.info(f"Searching the web for: '{keyword}'")
|
||||
keyword_results = self._search_web(keyword)
|
||||
results.extend(keyword_results)
|
||||
logger.info(f"Gefunden: {len(keyword_results)} Ergebnisse für '{keyword}'")
|
||||
logger.info(f"Found: {len(keyword_results)} results for '{keyword}'")
|
||||
|
||||
# Direkte URLs verarbeiten
|
||||
# Process direct URLs
|
||||
for url in search_urls:
|
||||
logger.info(f"Extrahiere Inhalt von: {url}")
|
||||
logger.info(f"Extracting content from: {url}")
|
||||
soup = self._read_url(url)
|
||||
|
||||
# Titel aus der Seite extrahieren, falls vorhanden
|
||||
# Extract title from the page, if available
|
||||
title = self._extract_title(soup, url)
|
||||
|
||||
result = self._parse_result(soup, title, url)
|
||||
results.append(result)
|
||||
logger.info(f"Extrahiert: '{title}' von {url}")
|
||||
logger.info(f"Extracted: '{title}' from {url}")
|
||||
|
||||
# Ergebnisse für die endgültige Ausgabe verarbeiten
|
||||
logger.info(f"Analysiere {len(results)} Web-Ergebnisse")
|
||||
# Process results for final output
|
||||
logger.info(f"Analyzing {len(results)} web results")
|
||||
|
||||
# Zusammenfassungen für jedes Ergebnis generieren
|
||||
# Generate summaries for each result
|
||||
processed_results = []
|
||||
for i, result in enumerate(results):
|
||||
result_data_limited = self._limit_text(result['data'], max_chars=10000)
|
||||
|
||||
logger.info(f"Analysiere Ergebnis {i+1}/{len(results)}: {result['title'][:30]}...")
|
||||
logger.info(f"Analyzing result {i+1}/{len(results)}: {result['title'][:30]}...")
|
||||
|
||||
# No AI service available, create minimal summary
|
||||
if not self.ai_service:
|
||||
content_summary = f"Extract from {result['url']} ({len(result_data_limited)} characters)"
|
||||
else:
|
||||
# Generate summary with AI
|
||||
content_summary = await self._summarize_result(result_data_limited, prompt)
|
||||
|
||||
processed_result = {
|
||||
|
|
@ -131,102 +143,212 @@ class AgentWebcrawler(AgentBase):
|
|||
|
||||
processed_results.append(processed_result)
|
||||
|
||||
# Gesamtzusammenfassung erstellen
|
||||
# Create overall summary
|
||||
all_summaries = "\n\n".join([r["summary"] for r in processed_results])
|
||||
all_summaries_limited = self._limit_text(all_summaries, max_chars=10000)
|
||||
|
||||
logger.info("Erstelle Gesamtzusammenfassung der Webrecherche")
|
||||
logger.info("Creating overall summary of web research")
|
||||
|
||||
if not self.ai_service:
|
||||
final_summary = f"Summary of {len(processed_results)} web research results"
|
||||
else:
|
||||
final_summary = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du erstellst prägnante Zusammenfassungen von Rechercheergebnissen."},
|
||||
{"role": "user", "content": f"Bitte fasse diese Erkenntnisse in 5-6 Sätzen zusammen: {all_summaries_limited}\n"}
|
||||
{"role": "system", "content": "You create concise summaries of research results."},
|
||||
{"role": "user", "content": f"Please summarize these findings in 5-6 sentences: {all_summaries_limited}\n"}
|
||||
])
|
||||
|
||||
# Sprache der Anfrage ermitteln, um Überschriften in der richtigen Sprache zu verwenden
|
||||
# Get localized headers for output
|
||||
headers = await self._get_localized_headers(prompt)
|
||||
|
||||
# Endgültiges Ergebnis formatieren
|
||||
final_result = f"## {headers['web_research_results']}\n\n### {headers['summary']}\n{final_summary}\n\n### {headers['detailed_results']}\n"
|
||||
# Create document objects based on output specifications
|
||||
generated_documents = []
|
||||
|
||||
for i, result in enumerate(processed_results, 1):
|
||||
final_result += f"\n\n[{i}] {result['title']}\n{headers['url']}: {result['url']}\n{headers['snippet']}: {result['snippet']}\n{headers['content']}: {result['summary']}"
|
||||
# Generate appropriate document for each requested output
|
||||
for spec in output_specs:
|
||||
output_label = spec.get("label", "")
|
||||
output_description = spec.get("description", "")
|
||||
|
||||
# Inhalt in der Antwort setzen
|
||||
response["content"] = final_result
|
||||
# Determine output format based on file extension
|
||||
format_type = self._determine_format_type(output_label)
|
||||
|
||||
logger.info("Webrecherche erfolgreich abgeschlossen")
|
||||
# Generate content based on format and requirements
|
||||
if format_type == "markdown" or format_type == "text":
|
||||
content = self._format_results_as_markdown(processed_results, final_summary, headers)
|
||||
elif format_type == "html":
|
||||
md_content = self._format_results_as_markdown(processed_results, final_summary, headers)
|
||||
content = markdown.markdown(md_content)
|
||||
elif format_type == "json":
|
||||
content = json.dumps({
|
||||
"summary": final_summary,
|
||||
"results": processed_results
|
||||
}, indent=2, ensure_ascii=False)
|
||||
elif format_type == "csv":
|
||||
csv_lines = ["Title,URL,Snippet"]
|
||||
for result in processed_results:
|
||||
# Escape commas and quotes in fields
|
||||
title = result["title"].replace('"', '""')
|
||||
url = result["url"].replace('"', '""')
|
||||
snippet = result["snippet"].replace('"', '""')
|
||||
csv_line = f'"{title}","{url}","{snippet}"'
|
||||
csv_lines.append(csv_line)
|
||||
content = "\n".join(csv_lines)
|
||||
else:
|
||||
# Default: Markdown
|
||||
content = self._format_results_as_markdown(processed_results, final_summary, headers)
|
||||
|
||||
return response
|
||||
# Add document to results list
|
||||
generated_documents.append({
|
||||
"label": output_label,
|
||||
"content": content
|
||||
})
|
||||
|
||||
# If no specific outputs requested, return standard document
|
||||
if not output_specs:
|
||||
content = self._format_results_as_markdown(processed_results, final_summary, headers)
|
||||
generated_documents.append({
|
||||
"label": "web_research_results.md",
|
||||
"content": content
|
||||
})
|
||||
|
||||
# Create feedback for response
|
||||
feedback = f"I conducted web research on '{prompt[:50]}...' and found {len(processed_results)} relevant results."
|
||||
|
||||
logger.info("Web research completed successfully")
|
||||
|
||||
return {
|
||||
"feedback": feedback,
|
||||
"documents": generated_documents
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
error_msg = f"Fehler bei der Webrecherche: {str(e)}"
|
||||
error_msg = f"Error during web research: {str(e)}"
|
||||
logger.error(error_msg)
|
||||
response["content"] = f"## Fehler bei der Webrecherche\n\n{error_msg}"
|
||||
return response
|
||||
return {
|
||||
"feedback": f"An error occurred during the web research: {str(e)}",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
|
||||
def _determine_format_type(self, output_label: str) -> str:
|
||||
"""
|
||||
Determine the format type based on the filename.
|
||||
|
||||
Args:
|
||||
output_label: Output filename
|
||||
|
||||
Returns:
|
||||
Format type (markdown, html, text, json, csv)
|
||||
"""
|
||||
output_label_lower = output_label.lower()
|
||||
|
||||
if output_label_lower.endswith(".md"):
|
||||
return "markdown"
|
||||
elif output_label_lower.endswith(".html"):
|
||||
return "html"
|
||||
elif output_label_lower.endswith(".txt"):
|
||||
return "text"
|
||||
elif output_label_lower.endswith(".json"):
|
||||
return "json"
|
||||
elif output_label_lower.endswith(".csv"):
|
||||
return "csv"
|
||||
else:
|
||||
# Default to markdown
|
||||
return "markdown"
|
||||
|
||||
def _format_results_as_markdown(self, results: List[Dict[str, Any]],
|
||||
summary: str, headers: Dict[str, str]) -> str:
|
||||
"""
|
||||
Format research results as markdown.
|
||||
|
||||
Args:
|
||||
results: List of results
|
||||
summary: Summary of all results
|
||||
headers: Localized headers
|
||||
|
||||
Returns:
|
||||
Formatted markdown text
|
||||
"""
|
||||
md_content = f"# {headers['web_research_results']}\n\n"
|
||||
|
||||
md_content += f"## {headers['summary']}\n\n{summary}\n\n"
|
||||
|
||||
if results:
|
||||
md_content += f"## {headers['detailed_results']}\n\n"
|
||||
|
||||
for i, result in enumerate(results, 1):
|
||||
md_content += f"### {i}. {result['title']}\n\n"
|
||||
md_content += f"**{headers['url']}**: {result['url']}\n\n"
|
||||
md_content += f"**{headers['snippet']}**: {result['snippet']}\n\n"
|
||||
md_content += f"**{headers['content']}**: {result['summary']}\n\n"
|
||||
|
||||
# Add separator between results (except for the last one)
|
||||
if i < len(results):
|
||||
md_content += "---\n\n"
|
||||
|
||||
return md_content
|
||||
|
||||
async def _is_web_research_request(self, prompt: str) -> bool:
|
||||
"""
|
||||
Verwendet KI, um festzustellen, ob eine Anfrage Webrecherche erfordert.
|
||||
Use AI to determine if a request requires web research.
|
||||
|
||||
Args:
|
||||
prompt: Die Benutzeranfrage
|
||||
prompt: The user request
|
||||
|
||||
Returns:
|
||||
True, wenn es explizit eine Webrecherche-Anfrage ist, sonst False
|
||||
True if it is explicitly a web research request, False otherwise
|
||||
"""
|
||||
if not self.ai_service:
|
||||
# Fallback zur einfacheren Erkennung, wenn kein KI-Service verfügbar ist
|
||||
# Fallback to simpler detection if no AI service is available
|
||||
return self._simple_web_detection(prompt)
|
||||
|
||||
try:
|
||||
# Prompt erstellen, um zu analysieren, ob es sich um eine Webrecherche-Anfrage handelt
|
||||
# Create prompt to analyze if this is a web research request
|
||||
analysis_prompt = f"""
|
||||
Analysiere die folgende Anfrage und bestimme, ob sie explizit eine Webrecherche oder Online-Informationen erfordert.
|
||||
Analyze the following request and determine if it explicitly requires web research or online information.
|
||||
|
||||
ANFRAGE: {prompt}
|
||||
REQUEST: {prompt}
|
||||
|
||||
Eine Anfrage erfordert Webrecherche, wenn:
|
||||
1. Sie explizit nach der Suche von Informationen online fragt
|
||||
2. Sie URLs oder Verweise auf Websites enthält
|
||||
3. Sie aktuelle Informationen anfordert, die im Web verfügbar wären
|
||||
4. Sie nach Informationen aus Web-Quellen fragt
|
||||
5. Sie implizit aktuelle Informationen aus dem Internet erfordert
|
||||
A request requires web research if:
|
||||
1. It explicitly asks for searching information online
|
||||
2. It contains URLs or references to websites
|
||||
3. It requests current information that would be available on the web
|
||||
4. It asks for information from web sources
|
||||
5. It implicitly requires current information from the internet
|
||||
|
||||
Antworte NUR mit einem einzelnen Wort - entweder "JA", wenn Webrecherche erforderlich ist, oder "NEIN", wenn nicht.
|
||||
Füge KEINE Erklärung hinzu, nur die Antwort JA oder NEIN.
|
||||
Reply ONLY with a single word - either "YES" if web research is required, or "NO" if not.
|
||||
"""
|
||||
|
||||
# KI zur Analyse aufrufen
|
||||
# Call AI for analysis
|
||||
response = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du bestimmst, ob eine Anfrage Webrecherche erfordert. Antworte immer nur mit JA oder NEIN."},
|
||||
{"role": "system", "content": "You determine if a request requires web research. Always respond with just YES or NO."},
|
||||
{"role": "user", "content": analysis_prompt}
|
||||
])
|
||||
|
||||
# Antwort bereinigen und überprüfen
|
||||
# Clean response and check
|
||||
response = response.strip().upper()
|
||||
|
||||
return "JA" in response
|
||||
return "YES" in response
|
||||
|
||||
except Exception as e:
|
||||
# Fehler protokollieren, aber nicht fehlschlagen, Fallback zur einfacheren Erkennung
|
||||
logger.warning(f"Fehler bei der KI-Erkennung von Webrecherche-Anfragen: {str(e)}")
|
||||
# Log error but don't fail, fallback to simpler detection
|
||||
logger.warning(f"Error in AI detection of web research requests: {str(e)}")
|
||||
return self._simple_web_detection(prompt)
|
||||
|
||||
def _simple_web_detection(self, prompt: str) -> bool:
|
||||
"""
|
||||
Einfachere Fallback-Methode zur Erkennung von Webrecherche-Anfragen anhand von URLs.
|
||||
Simpler fallback method for detecting web research requests based on URLs.
|
||||
|
||||
Args:
|
||||
prompt: Die Benutzeranfrage
|
||||
prompt: The user request
|
||||
|
||||
Returns:
|
||||
True, wenn es klare URL-Indikatoren gibt, sonst False
|
||||
True if there are clear URL indicators, False otherwise
|
||||
"""
|
||||
# URLs in der Anfrage deuten stark auf Webrecherche hin
|
||||
# URLs in the request strongly indicate web research
|
||||
url_indicators = ["http://", "https://", "www.", ".com", ".org", ".net", ".edu", ".gov"]
|
||||
web_terms = ["search", "find online", "look up", "web", "internet", "website", "suche", "finde", "recherchiere"]
|
||||
web_terms = ["search", "find online", "look up", "web", "internet", "website"]
|
||||
|
||||
# Auf URL-Muster in der Anfrage prüfen
|
||||
# Check for URL patterns in the request
|
||||
contains_url = any(indicator in prompt.lower() for indicator in url_indicators)
|
||||
contains_web_term = any(term in prompt.lower() for term in web_terms)
|
||||
|
||||
|
|
@ -234,100 +356,118 @@ class AgentWebcrawler(AgentBase):
|
|||
|
||||
async def _create_search_strategy(self, prompt: str) -> Dict[str, List[str]]:
|
||||
"""
|
||||
Erstellt eine Suchstrategie basierend auf der Anfrage.
|
||||
Create a search strategy based on the request.
|
||||
|
||||
Args:
|
||||
prompt: Die Benutzeranfrage
|
||||
prompt: The user request
|
||||
|
||||
Returns:
|
||||
Suchstrategie mit URLs und Suchbegriffen
|
||||
Search strategy with URLs and search terms
|
||||
"""
|
||||
if not self.ai_service:
|
||||
# Fallback zur einfachen Strategie
|
||||
# Fallback to simple strategy
|
||||
return {"skey": [prompt], "url": []}
|
||||
|
||||
try:
|
||||
# KI-Prompt zur Erstellung einer Suchstrategie
|
||||
strategy_prompt = f"""Erstelle eine umfassende Webrecherchestrategie für die Aufgabe = '{prompt.replace("'","")}'. Gib die Ergebnisse als Python-Dictionary mit diesen spezifischen Schlüsseln zurück. Wenn bestimmte URLs angegeben sind und die Aufgabe nur die Analyse dieser URLs erfordert, lass 'skey' leer.
|
||||
# AI prompt to create a search strategy
|
||||
strategy_prompt = f"""Create a comprehensive web research strategy for the following task:
|
||||
'{prompt.replace("'","")}'
|
||||
|
||||
'url': Eine Liste von maximal {self.max_url} spezifischen URLs, die aus der Aufgabenstellung extrahiert wurden.
|
||||
Return the results as a Python dictionary with these specific keys:
|
||||
|
||||
'skey': Eine Liste von maximal {self.max_key} Schlüsselsätzen, nach denen im Web gesucht werden soll. Diese sollten präzise, vielfältig und gezielt sein, um die relevantesten Informationen zu erhalten.
|
||||
'url': A list of up to {self.max_url} specific URLs extracted from the task.
|
||||
|
||||
Formatiere deine Antwort als gültiges JSON-Objekt mit diesen beiden Schlüsseln. Füge keinen erklärenden Text oder Markdown außerhalb der Objektdefinition hinzu.
|
||||
'skey': A list of up to {self.max_key} key phrases to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
|
||||
|
||||
If specific URLs are given and the task only requires analyzing these URLs, leave 'skey' empty.
|
||||
|
||||
Format your response as a valid JSON object with these two keys. Don't add any explanatory text.
|
||||
"""
|
||||
|
||||
# KI für Suchstrategie aufrufen
|
||||
# Call AI for search strategy
|
||||
content_text = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."},
|
||||
{"role": "system", "content": "You are a web research expert who develops precise search strategies."},
|
||||
{"role": "user", "content": strategy_prompt}
|
||||
])
|
||||
|
||||
# JSON-Code-Block-Markierungen entfernen, falls vorhanden
|
||||
# Remove JSON code block markers if present
|
||||
if content_text.startswith("```json"):
|
||||
end_marker = "```"
|
||||
end_index = content_text.rfind(end_marker)
|
||||
if end_index != -1:
|
||||
content_text = content_text[7:end_index].strip()
|
||||
elif content_text.startswith("```"):
|
||||
end_marker = "```"
|
||||
end_index = content_text.rfind(end_marker)
|
||||
if end_index != -1:
|
||||
content_text = content_text[3:end_index].strip()
|
||||
|
||||
# JSON parsen und zurückgeben
|
||||
# Extract only the JSON part (if surrounded by text)
|
||||
json_match = re.search(r'(\{.*\})', content_text, re.DOTALL)
|
||||
if json_match:
|
||||
content_text = json_match.group(1)
|
||||
|
||||
# Parse JSON and return
|
||||
strategy = json.loads(content_text)
|
||||
return strategy
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Erstellung der Suchstrategie: {str(e)}")
|
||||
# Einfache Fallback-Strategie
|
||||
logger.error(f"Error creating search strategy: {str(e)}")
|
||||
# Simple fallback strategy
|
||||
return {"skey": [prompt], "url": []}
|
||||
|
||||
async def _summarize_result(self, result_data: str, original_prompt: str) -> str:
|
||||
"""
|
||||
Erstellt eine Zusammenfassung eines Suchergebnisses mit KI.
|
||||
Create a summary of a search result using AI.
|
||||
|
||||
Args:
|
||||
result_data: Die zu zusammenfassenden Daten
|
||||
original_prompt: Die ursprüngliche Anfrage
|
||||
result_data: The data to summarize
|
||||
original_prompt: The original request
|
||||
|
||||
Returns:
|
||||
Zusammenfassung des Ergebnisses
|
||||
Summary of the result
|
||||
"""
|
||||
if not self.ai_service:
|
||||
return "Keine Zusammenfassung verfügbar (KI-Service nicht verfügbar)"
|
||||
return f"Summary of {len(result_data)} characters not available (AI service not available)"
|
||||
|
||||
try:
|
||||
# Anweisungen für die Zusammenfassung
|
||||
# Instructions for summarization
|
||||
summary_prompt = f"""
|
||||
Fasse dieses Suchergebnis gemäß der ursprünglichen Anfrage in etwa 2000 Zeichen zusammen. Ursprüngliche Anfrage = '{original_prompt.replace("'","")}'
|
||||
Konzentriere dich auf die wichtigsten Erkenntnisse und verbinde sie mit der ursprünglichen Anfrage. Du kannst jede Einleitung überspringen.
|
||||
Extrahiere nur relevante und hochwertige Informationen im Zusammenhang mit der Anfrage und präsentiere sie in einem klaren Format. Biete eine ausgewogene Ansicht der recherchierten Informationen.
|
||||
Summarize this search result according to the original request in about 2000 characters.
|
||||
|
||||
Hier ist das Suchergebnis:
|
||||
Original request = '{original_prompt.replace("'","")}'
|
||||
|
||||
Focus on the most important findings and connect them to the original request.
|
||||
Extract only relevant and high-quality information.
|
||||
|
||||
Here's the search result:
|
||||
{result_data}
|
||||
"""
|
||||
|
||||
# KI für Zusammenfassung aufrufen
|
||||
# Call AI for summary
|
||||
summary = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."},
|
||||
{"role": "system", "content": "You are an information analyst who summarizes web content precisely and relevantly."},
|
||||
{"role": "user", "content": summary_prompt}
|
||||
])
|
||||
|
||||
# Auf ~2000 Zeichen begrenzen
|
||||
# Limit to ~2000 characters
|
||||
return summary[:2000]
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler bei der Zusammenfassung des Ergebnisses: {str(e)}")
|
||||
return "Fehler bei der Zusammenfassung"
|
||||
logger.error(f"Error summarizing result: {str(e)}")
|
||||
return "Error creating summary"
|
||||
|
||||
async def _get_localized_headers(self, text: str) -> Dict[str, str]:
|
||||
"""
|
||||
Ermittelt lokalisierte Überschriften für die Webrecherche-Ergebnisse basierend auf der erkannten Sprache.
|
||||
Determine localized headers for web research results based on detected language.
|
||||
|
||||
Args:
|
||||
text: Text zur Spracherkennung
|
||||
text: Text for language detection
|
||||
|
||||
Returns:
|
||||
Dictionary mit lokalisierten Überschriften
|
||||
Dictionary with localized headers
|
||||
"""
|
||||
# Standard-Englische Überschriften
|
||||
# Default English headers
|
||||
headers = {
|
||||
"web_research_results": "Web Research Results",
|
||||
"summary": "Summary",
|
||||
|
|
@ -341,44 +481,22 @@ class AgentWebcrawler(AgentBase):
|
|||
return headers
|
||||
|
||||
try:
|
||||
# Sprache erkennen
|
||||
language_prompt = f"In welcher Sprache ist dieser Text geschrieben? Antworte nur mit dem Sprachnamen: {text[:200]}"
|
||||
# Detect language
|
||||
language_prompt = f"What language is this text written in? Answer with just the language name: {text[:200]}"
|
||||
language = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du bestimmst die Sprache eines Textes und gibst nur den Sprachnamen zurück."},
|
||||
{"role": "system", "content": "You determine the language of a text and return only the language name."},
|
||||
{"role": "user", "content": language_prompt}
|
||||
])
|
||||
|
||||
language = language.strip().lower()
|
||||
|
||||
# Englische Sprache oder Spracherkennung fehlgeschlagen, Standardüberschriften zurückgeben
|
||||
# English language or language detection failed, return default headers
|
||||
if language in ["english", "en", ""]:
|
||||
return headers
|
||||
|
||||
# Deutsche Überschriften
|
||||
if language in ["deutsch", "german", "de"]:
|
||||
return {
|
||||
"web_research_results": "Webrecherche-Ergebnisse",
|
||||
"summary": "Zusammenfassung",
|
||||
"detailed_results": "Detaillierte Ergebnisse",
|
||||
"url": "URL",
|
||||
"snippet": "Ausschnitt",
|
||||
"content": "Inhalt"
|
||||
}
|
||||
|
||||
# Französische Überschriften
|
||||
if language in ["französisch", "french", "fr"]:
|
||||
return {
|
||||
"web_research_results": "Résultats de recherche Web",
|
||||
"summary": "Résumé",
|
||||
"detailed_results": "Résultats détaillés",
|
||||
"url": "URL",
|
||||
"snippet": "Extrait",
|
||||
"content": "Contenu"
|
||||
}
|
||||
|
||||
# Überschriften übersetzen, wenn Sprache erkannt, aber keine vordefinierte Übersetzung
|
||||
# Translate headers if language recognized but no predefined translation
|
||||
translation_prompt = f"""
|
||||
Übersetze diese Webrecherche-Ergebnisüberschriften ins {language}:
|
||||
Translate these web research result headers to {language}:
|
||||
|
||||
Web Research Results
|
||||
Summary
|
||||
|
|
@ -387,71 +505,73 @@ class AgentWebcrawler(AgentBase):
|
|||
Snippet
|
||||
Content
|
||||
|
||||
Gib ein JSON-Objekt mit diesen Schlüsseln zurück:
|
||||
Return a JSON object with these keys:
|
||||
web_research_results, summary, detailed_results, url, snippet, content
|
||||
"""
|
||||
|
||||
# KI für Übersetzung aufrufen
|
||||
# Call AI for translation
|
||||
response = await self.ai_service.call_api([
|
||||
{"role": "system", "content": "Du übersetzt Überschriften in die angegebene Sprache und gibst sie als JSON zurück."},
|
||||
{"role": "system", "content": "You translate headers to the specified language and return them as JSON."},
|
||||
{"role": "user", "content": translation_prompt}
|
||||
])
|
||||
|
||||
# JSON extrahieren
|
||||
import re
|
||||
# Extract JSON
|
||||
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
||||
|
||||
if json_match:
|
||||
try:
|
||||
translated_headers = json.loads(json_match.group(0))
|
||||
return translated_headers
|
||||
except json.JSONDecodeError:
|
||||
logger.warning(f"Error parsing translated headers JSON")
|
||||
|
||||
except Exception as e:
|
||||
# Fehler protokollieren, aber mit englischen Überschriften fortfahren
|
||||
logger.warning(f"Fehler beim Übersetzen der Überschriften: {str(e)}")
|
||||
# Log error but continue with English headers
|
||||
logger.warning(f"Error translating headers: {str(e)}")
|
||||
|
||||
return headers
|
||||
|
||||
def _search_web(self, query: str) -> List[Dict[str, str]]:
|
||||
"""
|
||||
Führt eine Websuche durch und gibt die Ergebnisse zurück.
|
||||
Conduct a web search and return the results.
|
||||
|
||||
Args:
|
||||
query: Die Suchanfrage
|
||||
query: The search query
|
||||
|
||||
Returns:
|
||||
Liste von Suchergebnissen
|
||||
List of search results
|
||||
"""
|
||||
formatted_query = quote_plus(query)
|
||||
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE')}{formatted_query}"
|
||||
url = f"{APP_CONFIG.get('Connector_AiWebscraping_SEARCH_ENGINE', 'https://html.duckduckgo.com/html/?q=')}{formatted_query}"
|
||||
|
||||
search_results_soup = self._read_url(url)
|
||||
if not isinstance(search_results_soup, BeautifulSoup) or not search_results_soup.select('.result'):
|
||||
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
|
||||
logger.warning(f"No search results found for: {query}")
|
||||
return []
|
||||
|
||||
# Suchergebnisse extrahieren
|
||||
# Extract search results
|
||||
results = []
|
||||
|
||||
# Alle Ergebniscontainer finden
|
||||
# Find all result containers
|
||||
result_elements = search_results_soup.select('.result')
|
||||
|
||||
for result in result_elements:
|
||||
# Titel extrahieren
|
||||
# Extract title
|
||||
title_element = result.select_one('.result__a')
|
||||
title = title_element.text.strip() if title_element else 'Kein Titel'
|
||||
title = title_element.text.strip() if title_element else 'No title'
|
||||
|
||||
# URL extrahieren (DuckDuckGo verwendet Weiterleitungen)
|
||||
# Extract URL (DuckDuckGo uses redirects)
|
||||
url_element = title_element.get('href') if title_element else ''
|
||||
extracted_url = 'Keine URL'
|
||||
extracted_url = 'No URL'
|
||||
|
||||
if url_element:
|
||||
# Tatsächliche URL aus DuckDuckGos Weiterleitung extrahieren
|
||||
# Extract actual URL from DuckDuckGo's redirect
|
||||
if url_element.startswith('/d.js?q='):
|
||||
start = url_element.find('?q=') + 3
|
||||
end = url_element.find('&', start) if '&' in url_element[start:] else None
|
||||
extracted_url = unquote(url_element[start:end])
|
||||
|
||||
# Sicherstellen, dass die URL das korrekte Protokollpräfix hat
|
||||
# Ensure URL has correct protocol prefix
|
||||
if not extracted_url.startswith(('http://', 'https://')):
|
||||
if not extracted_url.startswith('//'):
|
||||
extracted_url = 'https://' + extracted_url
|
||||
|
|
@ -460,14 +580,14 @@ class AgentWebcrawler(AgentBase):
|
|||
else:
|
||||
extracted_url = url_element
|
||||
|
||||
# Snippet direkt aus der Suchergebnisseite extrahieren
|
||||
# Extract snippet directly from search results page
|
||||
snippet_element = result.select_one('.result__snippet')
|
||||
snippet = snippet_element.text.strip() if snippet_element else 'Keine Beschreibung'
|
||||
snippet = snippet_element.text.strip() if snippet_element else 'No description'
|
||||
|
||||
# Tatsächlichen Seiteninhalt für das Datenfeld abrufen
|
||||
# Get actual page content for the data field
|
||||
target_page_soup = self._read_url(extracted_url)
|
||||
|
||||
# Neue Inhaltsextraktionsmethode verwenden, um Inhaltsgröße zu begrenzen
|
||||
# Use new content extraction method to limit content size
|
||||
content = self._extract_main_content(target_page_soup)
|
||||
|
||||
results.append({
|
||||
|
|
@ -477,7 +597,7 @@ class AgentWebcrawler(AgentBase):
|
|||
'data': content
|
||||
})
|
||||
|
||||
# Anzahl der Ergebnisse bei Bedarf begrenzen
|
||||
# Limit number of results if needed
|
||||
if len(results) >= self.max_result:
|
||||
break
|
||||
|
||||
|
|
@ -485,68 +605,68 @@ class AgentWebcrawler(AgentBase):
|
|||
|
||||
def _read_url(self, url: str) -> BeautifulSoup:
|
||||
"""
|
||||
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
|
||||
Read a URL and return a BeautifulSoup parser for the content.
|
||||
|
||||
Args:
|
||||
url: Die zu lesende URL
|
||||
url: The URL to read
|
||||
|
||||
Returns:
|
||||
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
|
||||
BeautifulSoup object with the content or empty on errors
|
||||
"""
|
||||
headers = {
|
||||
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT"),
|
||||
'User-Agent': APP_CONFIG.get("Connector_AiWebscraping_USER_AGENT", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"),
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml',
|
||||
'Accept-Language': 'en-US,en;q=0.9',
|
||||
}
|
||||
|
||||
try:
|
||||
# Initiale Anfrage
|
||||
# Initial request
|
||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
||||
|
||||
# Abfragen für Status 202
|
||||
# Handling for status 202
|
||||
if response.status_code == 202:
|
||||
# Maximal 3 Versuche mit zunehmenden Intervallen
|
||||
# Max 3 retries with increasing intervals
|
||||
backoff_times = [0.5, 1.0, 2.0, 5.0]
|
||||
|
||||
for wait_time in backoff_times:
|
||||
time.sleep(wait_time) # Mit zunehmender Zeit warten
|
||||
time.sleep(wait_time) # Wait with increasing time
|
||||
response = requests.get(url, headers=headers, timeout=self.timeout)
|
||||
|
||||
# Wenn kein 202 mehr, dann abbrechen
|
||||
# If no more 202, break
|
||||
if response.status_code != 202:
|
||||
break
|
||||
|
||||
# Für andere Fehlerstatuscodes einen Fehler auslösen
|
||||
# Raise for other error status codes
|
||||
response.raise_for_status()
|
||||
|
||||
# HTML parsen
|
||||
# Parse HTML
|
||||
return BeautifulSoup(response.text, 'html.parser')
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Lesen der URL {url}: {str(e)}")
|
||||
# Leeres BeautifulSoup-Objekt erstellen
|
||||
logger.error(f"Error reading URL {url}: {str(e)}")
|
||||
# Create empty BeautifulSoup object
|
||||
return BeautifulSoup("<html><body></body></html>", 'html.parser')
|
||||
|
||||
def _extract_title(self, soup: BeautifulSoup, url: str) -> str:
|
||||
"""
|
||||
Extrahiert den Titel aus einer Webseite.
|
||||
Extract the title from a webpage.
|
||||
|
||||
Args:
|
||||
soup: BeautifulSoup-Objekt der Webseite
|
||||
url: URL der Webseite
|
||||
soup: BeautifulSoup object of the webpage
|
||||
url: URL of the webpage
|
||||
|
||||
Returns:
|
||||
Extrahierter Titel
|
||||
Extracted title
|
||||
"""
|
||||
if not isinstance(soup, BeautifulSoup):
|
||||
return f"Fehler bei {url}"
|
||||
return f"Error with {url}"
|
||||
|
||||
# Titel aus dem title-Tag extrahieren
|
||||
# Extract title from title tag
|
||||
title_tag = soup.find('title')
|
||||
title = title_tag.text.strip() if title_tag else "Kein Titel"
|
||||
title = title_tag.text.strip() if title_tag else "No title"
|
||||
|
||||
# Alternative: Auch nach h1-Tags suchen, wenn der title-Tag fehlt
|
||||
if title == "Kein Titel":
|
||||
# Alternative: Also look for h1 tags if title tag is missing
|
||||
if title == "No title":
|
||||
h1_tag = soup.find('h1')
|
||||
if h1_tag:
|
||||
title = h1_tag.text.strip()
|
||||
|
|
@ -555,19 +675,19 @@ class AgentWebcrawler(AgentBase):
|
|||
|
||||
def _extract_main_content(self, soup: BeautifulSoup, max_chars: int = 10000) -> str:
|
||||
"""
|
||||
Extrahiert den Hauptinhalt aus einer HTML-Seite.
|
||||
Extract the main content from an HTML page.
|
||||
|
||||
Args:
|
||||
soup: BeautifulSoup-Objekt der Webseite
|
||||
max_chars: Maximale Anzahl von Zeichen
|
||||
soup: BeautifulSoup object of the webpage
|
||||
max_chars: Maximum number of characters
|
||||
|
||||
Returns:
|
||||
Extrahierter Hauptinhalt als String
|
||||
Extracted main content as a string
|
||||
"""
|
||||
if not isinstance(soup, BeautifulSoup):
|
||||
return str(soup)[:max_chars] if soup else ""
|
||||
|
||||
# Versuchen, Hauptinhaltselemente in Prioritätsreihenfolge zu finden
|
||||
# Try to find main content elements in priority order
|
||||
main_content = None
|
||||
for selector in ['main', 'article', '#content', '.content', '#main', '.main']:
|
||||
content = soup.select_one(selector)
|
||||
|
|
@ -575,70 +695,71 @@ class AgentWebcrawler(AgentBase):
|
|||
main_content = content
|
||||
break
|
||||
|
||||
# Wenn kein Hauptinhalt gefunden wurde, den Body verwenden
|
||||
# If no main content found, use the body
|
||||
if not main_content:
|
||||
main_content = soup.find('body') or soup
|
||||
|
||||
# Skript-, Style-, Nav-, Footer-Elemente entfernen, die nicht zum Hauptinhalt beitragen
|
||||
# Remove script, style, nav, footer elements that don't contribute to main content
|
||||
for element in main_content.select('script, style, nav, footer, header, aside, .sidebar, #sidebar, .comments, #comments, .advertisement, .ads, iframe'):
|
||||
element.extract()
|
||||
|
||||
# Textinhalt extrahieren
|
||||
# Extract text content
|
||||
text_content = main_content.get_text(separator=' ', strip=True)
|
||||
|
||||
# Auf max_chars begrenzen
|
||||
# Limit to max_chars
|
||||
return text_content[:max_chars]
|
||||
|
||||
def _parse_result(self, soup: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
|
||||
"""
|
||||
Parst ein BeautifulSoup-Objekt in ein Ergebnis-Dictionary.
|
||||
Parse a BeautifulSoup object into a result dictionary.
|
||||
|
||||
Args:
|
||||
soup: BeautifulSoup-Objekt der Webseite
|
||||
title: Seitentitel
|
||||
url: Seiten-URL
|
||||
soup: BeautifulSoup object of the webpage
|
||||
title: Page title
|
||||
url: Page URL
|
||||
|
||||
Returns:
|
||||
Dictionary mit Ergebnisdaten
|
||||
Dictionary with result data
|
||||
"""
|
||||
# Inhalt extrahieren
|
||||
# Extract content
|
||||
content = self._extract_main_content(soup)
|
||||
|
||||
result = {
|
||||
'title': title,
|
||||
'url': url,
|
||||
'snippet': 'Keine Beschreibung', # Standardwert
|
||||
'snippet': 'No description', # Default value
|
||||
'data': content
|
||||
}
|
||||
return result
|
||||
|
||||
def _limit_text(self, text: str, max_chars: int = 10000) -> str:
|
||||
"""
|
||||
Begrenzt den Text auf eine maximale Anzahl von Zeichen.
|
||||
Limit text to a maximum number of characters.
|
||||
|
||||
Args:
|
||||
text: Eingangstext
|
||||
max_chars: Maximale Anzahl von Zeichen
|
||||
text: Input text
|
||||
max_chars: Maximum number of characters
|
||||
|
||||
Returns:
|
||||
Begrenzter Text
|
||||
Limited text
|
||||
"""
|
||||
if not text:
|
||||
return ""
|
||||
|
||||
# Wenn der Text bereits unter dem Limit liegt, unverändert zurückgeben
|
||||
# If text is already under the limit, return unchanged
|
||||
if len(text) <= max_chars:
|
||||
return text
|
||||
|
||||
# Andernfalls den Text auf max_chars begrenzen
|
||||
return text[:max_chars] + "... [Inhalt aufgrund der Länge gekürzt]"
|
||||
# Otherwise limit text to max_chars
|
||||
return text[:max_chars] + "... [Content truncated due to length]"
|
||||
|
||||
# Singleton-Instanz
|
||||
_webcrawler_agent = None
|
||||
|
||||
# Factory function for the Webcrawler agent
|
||||
def get_webcrawler_agent():
|
||||
"""Gibt eine Singleton-Instanz des Webcrawler-Agenten zurück"""
|
||||
global _webcrawler_agent
|
||||
if _webcrawler_agent is None:
|
||||
_webcrawler_agent = AgentWebcrawler()
|
||||
return _webcrawler_agent
|
||||
"""
|
||||
Factory function that returns an instance of the Webcrawler agent.
|
||||
|
||||
Returns:
|
||||
An instance of the Webcrawler agent
|
||||
"""
|
||||
return AgentWebcrawler()
|
||||
|
|
@ -1,207 +1,204 @@
|
|||
"""
|
||||
Chat Agent Registry Modul.
|
||||
Stellt ein zentrales Registry-System für alle verfügbaren Agenten bereit.
|
||||
Chat Agent Registry Module.
|
||||
Provides a central registry system for all available agents.
|
||||
Optimized for the standardized task processing pattern.
|
||||
"""
|
||||
|
||||
import os
|
||||
import logging
|
||||
import importlib
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from typing import Dict, Any, List, Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class AgentBase:
|
||||
"""
|
||||
Base class for all chat agents.
|
||||
Defines the standardized interface for task processing.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize the base agent."""
|
||||
self.name = "base-agent"
|
||||
self.description = "Basic agent functionality"
|
||||
self.capabilities = []
|
||||
self.ai_service = None
|
||||
|
||||
def set_dependencies(self, ai_service=None):
|
||||
"""Set external dependencies for the agent."""
|
||||
self.ai_service = ai_service
|
||||
|
||||
def get_agent_info(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Return standardized information about the agent's capabilities.
|
||||
|
||||
Returns:
|
||||
Dictionary with name, description, and capabilities
|
||||
"""
|
||||
return {
|
||||
"name": self.name,
|
||||
"description": self.description,
|
||||
"capabilities": self.capabilities
|
||||
}
|
||||
|
||||
async def process_task(self, task: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
Process a standardized task structure and return results.
|
||||
This method must be implemented by all concrete agent classes.
|
||||
|
||||
Args:
|
||||
task: A dictionary containing:
|
||||
- task_id: Unique ID for this task
|
||||
- workflow_id: ID of the parent workflow (optional)
|
||||
- prompt: The main instruction for the agent
|
||||
- input_documents: List of document objects to process
|
||||
- output_specifications: List of required output documents
|
||||
- context: Additional contextual information
|
||||
|
||||
Returns:
|
||||
A dictionary containing:
|
||||
- feedback: Text response explaining what the agent did
|
||||
- documents: List of document objects created by the agent
|
||||
"""
|
||||
# Base implementation - should be overridden by specialized agents
|
||||
logger.warning(f"Agent {self.name} is using the default implementation of process_task")
|
||||
return {
|
||||
"feedback": f"The process_task method was not implemented by agent '{self.name}'.",
|
||||
"documents": []
|
||||
}
|
||||
|
||||
|
||||
class AgentRegistry:
|
||||
"""Zentrale Registry für alle verfügbaren Agenten im System."""
|
||||
"""Central registry for all available agents in the system."""
|
||||
|
||||
_instance = None
|
||||
|
||||
@classmethod
|
||||
def get_instance(cls):
|
||||
"""Gibt eine Singleton-Instanz der Agent-Registry zurück."""
|
||||
"""Return a singleton instance of the agent registry."""
|
||||
if cls._instance is None:
|
||||
cls._instance = cls()
|
||||
return cls._instance
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiert die Agent-Registry."""
|
||||
"""Initialize the agent registry."""
|
||||
if AgentRegistry._instance is not None:
|
||||
raise RuntimeError("Singleton-Instanz existiert bereits - verwende get_instance()")
|
||||
raise RuntimeError("Singleton instance already exists - use get_instance()")
|
||||
|
||||
self.agents = {}
|
||||
self.ai_service = None
|
||||
self._load_agents()
|
||||
|
||||
def _load_agents(self):
|
||||
"""Lädt alle verfügbaren Agenten aus den Modulen."""
|
||||
logger.info("Lade Agent-Module...")
|
||||
"""Load all available agents from modules."""
|
||||
logger.info("Loading agent modules...")
|
||||
|
||||
# Liste der zu ladenden Agent-Module
|
||||
# List of agent modules to load
|
||||
agent_modules = []
|
||||
agent_dir = os.path.dirname(__file__)
|
||||
|
||||
# Durchsuche das Verzeichnis nach Agent-Modulen
|
||||
# Search the directory for agent modules
|
||||
for filename in os.listdir(agent_dir):
|
||||
if filename.startswith("chat_agent_") and filename.endswith(".py"):
|
||||
agent_modules.append(filename[:-3]) # Entferne .py-Endung
|
||||
agent_modules.append(filename[:-3]) # Remove .py extension
|
||||
|
||||
if not agent_modules:
|
||||
logger.warning("Keine Agent-Module gefunden")
|
||||
logger.warning("No agent modules found")
|
||||
return
|
||||
|
||||
logger.info(f"{len(agent_modules)} Agent-Module gefunden")
|
||||
logger.info(f"{len(agent_modules)} agent modules found")
|
||||
|
||||
# Lade jedes Agent-Modul
|
||||
# Load each agent module
|
||||
for module_name in agent_modules:
|
||||
try:
|
||||
# Importiere das Modul
|
||||
# Import the module
|
||||
module = importlib.import_module(f"modules.{module_name}")
|
||||
|
||||
# Suche nach der Agent-Klasse oder einer get_*_agent-Funktion
|
||||
agent_name= module_name.split('_')[-1]
|
||||
# Look for agent class or get_*_agent function
|
||||
agent_name = module_name.split('_')[-1]
|
||||
class_name = f"Agent{agent_name.capitalize()}"
|
||||
getter_name = f"get_{agent_name}_agent"
|
||||
|
||||
agent = None
|
||||
|
||||
# Versuche, den Agenten über die get_*_agent-Funktion zu erhalten
|
||||
# Try to get the agent via the get_*_agent function
|
||||
if hasattr(module, getter_name):
|
||||
getter_func = getattr(module, getter_name)
|
||||
agent = getter_func()
|
||||
logger.info(f"Agent '{agent.name}' über {getter_name}() geladen")
|
||||
logger.info(f"Agent '{agent.name}' loaded via {getter_name}()")
|
||||
|
||||
# Alternativ versuche, den Agenten direkt zu instanziieren
|
||||
# Alternatively, try to instantiate the agent directly
|
||||
elif hasattr(module, class_name):
|
||||
agent_class = getattr(module, class_name)
|
||||
agent = agent_class()
|
||||
logger.info(f"Agent '{agent.name}' (Typ: {agent.name}) direkt instanziert")
|
||||
logger.info(f"Agent '{agent.name}' directly instantiated")
|
||||
|
||||
if agent:
|
||||
# Registriere den Agenten
|
||||
# Register the agent
|
||||
self.register_agent(agent)
|
||||
else:
|
||||
logger.warning(f"Keine Agent-Klasse oder Getter-Funktion in Modul {module_name} gefunden")
|
||||
logger.warning(f"No agent class or getter function found in module {module_name}")
|
||||
|
||||
except ImportError as e:
|
||||
logger.error(f"Modul {module_name} konnte nicht importiert werden: {e}")
|
||||
logger.error(f"Module {module_name} could not be imported: {e}")
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler beim Laden des Agenten aus Modul {module_name}: {e}")
|
||||
logger.error(f"Error loading agent from module {module_name}: {e}")
|
||||
|
||||
def set_ai_service(self, ai_service):
|
||||
"""Set the AI service for all agents."""
|
||||
self.ai_service = ai_service
|
||||
self.update_agent_dependencies()
|
||||
|
||||
def update_agent_dependencies(self):
|
||||
"""Aktualisiert die Abhängigkeiten für alle registrierten Agenten."""
|
||||
"""Update dependencies for all registered agents."""
|
||||
for agent_id, agent in self.agents.items():
|
||||
if hasattr(agent, 'set_dependencies'):
|
||||
agent.set_dependencies(ai_service=self.ai_service)
|
||||
|
||||
def register_agent(self, agent):
|
||||
"""
|
||||
Registriert einen Agenten in der Registry.
|
||||
Register an agent in the registry.
|
||||
|
||||
Args:
|
||||
agent: Der zu registrierende Agent
|
||||
agent: The agent to register
|
||||
"""
|
||||
agent_id = getattr(agent, 'name', "unknown_agent")
|
||||
# Initialisiere Agenten mit Abhängigkeiten
|
||||
# Initialize agent with dependencies
|
||||
if hasattr(agent, 'set_dependencies'):
|
||||
agent.set_dependencies(ai_service=self.ai_service)
|
||||
self.agents[agent_id] = agent
|
||||
logger.debug(f"Agent '{agent.name}' (Typ: {agent_id}, Name: {agent_id}) registriert")
|
||||
logger.debug(f"Agent '{agent.name}' registered")
|
||||
|
||||
def get_agent(self, agent_identifier: str):
|
||||
"""
|
||||
Gibt eine Agenten-Instanz zurück
|
||||
Return an agent instance
|
||||
Args:
|
||||
agent_identifier: ID oder Typ des gewünschten Agenten
|
||||
agent_identifier: ID or type of the desired agent
|
||||
Returns:
|
||||
Agenten-Instanz oder None, falls nicht gefunden
|
||||
Agent instance or None if not found
|
||||
"""
|
||||
if agent_identifier in self.agents:
|
||||
return self.agents[agent_identifier]
|
||||
logger.error(f"Agent mit Kennung '{agent_identifier}' nicht gefunden")
|
||||
logger.error(f"Agent with identifier '{agent_identifier}' not found")
|
||||
return None
|
||||
|
||||
def get_all_agents(self) -> Dict[str, Any]:
|
||||
"""Gibt alle registrierten Agenten zurück."""
|
||||
"""Return all registered agents."""
|
||||
return self.agents
|
||||
|
||||
def get_agent_infos(self) -> List[Dict[str, Any]]:
|
||||
"""Gibt Informationen über alle registrierten Agenten zurück."""
|
||||
"""Return information about all registered agents."""
|
||||
agent_infos = []
|
||||
seen_agents = set()
|
||||
for agent in self.agents.values():
|
||||
if agent not in seen_agents:
|
||||
# Verwende get_agent_info oder erstelle manuell die Info
|
||||
if hasattr(agent, 'get_agent_info'):
|
||||
agent_infos.append(agent.get_agent_info())
|
||||
else:
|
||||
agent_infos.append({
|
||||
"name": agent.name,
|
||||
"capabilities": getattr(agent, 'capabilities', ""),
|
||||
})
|
||||
logger.error(f"Agent '{agent.name}' does not show profile.")
|
||||
seen_agents.add(agent)
|
||||
return agent_infos
|
||||
|
||||
|
||||
# Basis-Agent-Klasse
|
||||
class AgentBase:
|
||||
"""
|
||||
Basis-Klasse für alle Chat-Agenten.
|
||||
Definiert die grundlegende Schnittstelle und Funktionalität.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialisiere den Basis-Agenten."""
|
||||
self.name = "Basis-Agent"
|
||||
self.capabilities = "Grundlegende Agentenfunktionen"
|
||||
self.ai_service = None
|
||||
|
||||
def set_dependencies(self, ai_service=None):
|
||||
self.ai_service = ai_service
|
||||
|
||||
def get_config(self) -> Dict[str, Any]:
|
||||
return {
|
||||
"name": self.name,
|
||||
"capabilities": self.capabilities,
|
||||
}
|
||||
|
||||
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
|
||||
# Basisimplementierung - sollte von spezialisierten Agenten überschrieben werden
|
||||
if not self.ai_service:
|
||||
logger.warning(f"Agent {self.id} hat keinen konfigurierten AI-Service")
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Ich bin {self.name}, aber ich bin nicht richtig konfiguriert. Bitte den AI-Service einrichten.",
|
||||
"agent_name": self.name,
|
||||
}
|
||||
|
||||
# Einfachen Prompt erstellen
|
||||
prompt = message.get("content", "")
|
||||
|
||||
# Antwort generieren
|
||||
try:
|
||||
response_content = self.ai_service.call_api([
|
||||
{"role": "system", "content": f"Du bist {self.name}, ein spezialisierter {self.name}-Agent mit Fähigkeiten in: {self.capabilities}"},
|
||||
{"role": "user", "content": prompt}
|
||||
])
|
||||
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": response_content,
|
||||
"agent_name": self.name,
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Fehler in Agent {self.id}: {str(e)}")
|
||||
return {
|
||||
"role": "assistant",
|
||||
"content": f"Ich habe einen Fehler festgestellt: {str(e)}",
|
||||
"agent_name": self.name,
|
||||
}
|
||||
|
||||
|
||||
# Singleton-Factory für die Agent-Registry
|
||||
# Singleton factory for the agent registry
|
||||
def get_agent_registry():
|
||||
return AgentRegistry.get_instance()
|
||||
|
|
@ -1,33 +1,42 @@
|
|||
....................... TASKS
|
||||
|
||||
please revise all chat_agents* modules:
|
||||
- all comments, logs and outputs in english language
|
||||
- all ai answers in the language of the user
|
||||
- no language specific features like analysis of words. a prompt in japanese would not work with this! i need it generically.
|
||||
- why are there still data extraction routines in the modules? - data is already delivered in the input_documents section.
|
||||
|
||||
run agent, then save output files to db
|
||||
. files save-> fileid list, ALWAYS TO WRITE NEW FILES!
|
||||
. chat_message_to_workflow(role, agent,chatmsg, workflow): with answer and fileidlist
|
||||
documentation agent:
|
||||
- why to try to find out document type, when in the "label" of the files to deliver the extension is ALWAYS indludes (e.g. .docx, .csv, etc.). Please revise, this can be very much shortened and simplified
|
||||
|
||||
webcrawler_agent:
|
||||
- there is a try - except mapping problem in the code. please also fix this
|
||||
-
|
||||
|
||||
also attached chat.py and chat_content_extraction (centralized), that you can see the scrutcure of passed parameters.
|
||||
|
||||
----------------------- OPEN
|
||||
|
||||
PRIO1:
|
||||
|
||||
Split big files into content-parts
|
||||
sharepoint connector with document search, content search, content extraction
|
||||
|
||||
add connector to myoutlook
|
||||
|
||||
Split big files into content-parts
|
||||
|
||||
|
||||
PRIO2:
|
||||
|
||||
implement cleanup routines for files in lucydom_interface (File_Management_CLEANUP_INTERVAL): temp older than interval, all orphaned
|
||||
|
||||
frontend: no labels definition
|
||||
|
||||
Integrate NDA Text as modal form - Data governance agreement by login with checkbox
|
||||
|
||||
sharepoint connector with document search, content search, content extraction
|
||||
|
||||
add connector to myoutlook
|
||||
|
||||
frontend to react
|
||||
|
||||
frontend: no labels definition
|
||||
|
||||
|
||||
|
||||
|
||||
----------------------- DONE
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ pandas==2.2.3 # Aktuelle Version beibehalten
|
|||
## Data Visualization
|
||||
matplotlib==3.8.0 # Aktuelle Version beibehalten
|
||||
seaborn==0.13.0
|
||||
markdown
|
||||
|
||||
## Web Scraping & HTTP
|
||||
beautifulsoup4==4.12.2
|
||||
|
|
|
|||
Loading…
Reference in a new issue