demo base

This commit is contained in:
valueon 2025-04-16 10:49:27 +02:00
parent c75a3b67ce
commit cb7e2357e5
16 changed files with 2978 additions and 436 deletions

View file

@ -1,7 +1,7 @@
{
"prompts": 1,
"files": 1,
"workflows": "4745941d-d058-49f4-a086-45a6206acd4d",
"workflow_logs": "log_994584d7-edce-407d-bfda-a43f1d3ba4b8",
"workflow_messages": "msg_209d65e3-6096-41a4-91a9-c57f1fba4abe"
"workflows": "c9667202-9030-4b92-8a4e-8a7cd20cda8f",
"workflow_logs": "log_bcaf36df-91d2-4e94-8b30-80cd6c1f0f42",
"workflow_messages": "msg_44525779-46da-493b-885d-4e8bd3b933d6"
}

View file

@ -2,39 +2,15 @@
{
"mandate_id": 1,
"user_id": 1,
"name": "LF-Nutshell.png",
"type": "image",
"content_type": "image/png",
"size": 52108,
"path": "./_uploads\\1\\file_f4cd9e8d-158f-450e-91b1-6b1231fb0900_LF-Nutshell.png",
"hash": "30640f00e9e123f6fecbddb9da7f0c399e89caedd186aa96a40f7201c2f9aba5",
"upload_date": "2025-04-04T08:54:37.031189",
"name": "auszug_liste_positionen.pdf",
"type": "document",
"content_type": "application/pdf",
"size": 299729,
"path": "./_uploads\\1\\file_a2c4427f-c778-4c46-98df-fe3c6a69e299_auszug_liste_positionen.pdf",
"hash": "fa9b47b19581a2dc6380b66592245270e648971a6a5671ddfc6e9073377a6612",
"upload_date": "2025-04-16T08:42:49.615206",
"id": 1
},
{
"mandate_id": 1,
"user_id": 1,
"name": "LF-Target.png",
"type": "image",
"content_type": "image/png",
"size": 256760,
"path": "./_uploads\\1\\file_208023f5-1e31-4981-8481-18c317a53c63_LF-Target.png",
"hash": "36feef589c28364729551ed89b1ca70034a557860ac71768b640ff55a8eae160",
"upload_date": "2025-04-04T09:40:03.517248",
"id": 2
},
{
"mandate_id": 1,
"user_id": 1,
"name": "LF-Current.png",
"type": "image",
"content_type": "image/png",
"size": 126277,
"path": "./_uploads\\1\\file_ba127a2b-52fb-4230-aee3-95019deab986_LF-Current.png",
"hash": "bdaec5c6442cb4922dd701432d47cfaaa3e044d0d8b179ff16f56ee01fb501ce",
"upload_date": "2025-04-04T09:58:18.805036",
"id": 3
},
{
"mandate_id": 1,
"user_id": 1,
@ -42,57 +18,9 @@
"type": "image",
"content_type": "image/png",
"size": 253009,
"path": "./_uploads\\1\\file_002dfba0-9984-43b5-8ee9-d240830b56ea_LF-Details.png",
"path": "./_uploads\\1\\file_3b671cb6-1db0-45b5-9584-7778c95ed466_LF-Details.png",
"hash": "b54b3af60771dd373e1ddc0a3682023250fa056a9a72b7ee41dc628489b553b2",
"upload_date": "2025-04-04T10:08:52.784966",
"id": 4
},
{
"mandate_id": 1,
"user_id": 1,
"name": "auszug_liste_positionen.pdf",
"type": "document",
"content_type": "application/pdf",
"size": 299729,
"path": "./_uploads\\1\\file_b7e4d19a-7225-427b-bbaa-08ea1e5abb80_auszug_liste_positionen.pdf",
"hash": "fa9b47b19581a2dc6380b66592245270e648971a6a5671ddfc6e9073377a6612",
"upload_date": "2025-04-04T12:42:55.018650",
"id": 5
},
{
"mandate_id": 1,
"user_id": 1,
"name": "myDELTAgroup.drawio.pdf",
"type": "document",
"content_type": "application/pdf",
"size": 58259,
"path": "./_uploads\\1\\file_8002f210-0b18-4932-8bed-c2b3db4dbbac_myDELTAgroup.drawio.pdf",
"hash": "a4dfee5809db7cf45de6a61fe0b15927fee92444abed0b89d6deccec8842ac5d",
"upload_date": "2025-04-04T18:32:16.037731",
"id": 6
},
{
"mandate_id": 1,
"user_id": 1,
"name": "prompt_a1.txt",
"type": "document",
"content_type": "text/plain",
"size": 498,
"path": "./_uploads\\1\\file_f2102d97-de0f-4df5-afb4-575d884ab76a_prompt_a1.txt",
"hash": "c17eb7cc2ed742ddeada7a9548bc5e7c943ed68456c3612acb3c0a94809e5c65",
"upload_date": "2025-04-04T18:33:15.676504",
"id": 7
},
{
"mandate_id": 1,
"user_id": 1,
"name": "data.csv",
"type": "document",
"content_type": "application/vnd.ms-excel",
"size": 78,
"path": "./_uploads\\1\\file_7980d2d6-53c8-48ed-930c-fa5dd114ce15_data.csv",
"hash": "8ba6e6b67fe69411e4eb3962180591aaa67778fa96b990b6df79efbc398bce31",
"upload_date": "2025-04-11T10:18:31.414800",
"id": 8
"upload_date": "2025-04-16T08:43:21.663565",
"id": 2
}
]

View file

@ -38,7 +38,7 @@
"mandate_id": 1,
"user_id": 1,
"content": "Analysiere den beigefügten Datensatz zu [THEMA] und identifiziere die wichtigsten Trends, Muster und Auffälligkeiten. Führe statistische Berechnungen durch, um deine Erkenntnisse zu untermauern. Stelle die Ergebnisse in einer klar strukturierten Analyse dar und ziehe relevante Schlussfolgerungen.",
"name": "Analyse: Datenanalyse",
"name": "Analyse: Datenanalysen",
"id": 6
},
{

View file

@ -77,6 +77,9 @@ class AnalystAgent(BaseAgent):
"""Set the document handler for file operations"""
self.document_handler = document_handler
"""
Main updates to the process_message method in AnalystAgent to consider all available content.
"""
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Process a message and perform data analysis.
@ -141,14 +144,18 @@ class AnalystAgent(BaseAgent):
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# If we don't have any data frames but expected to analyze data, report this issue
if not data_frames and any(term in task.lower() for term in ["analyze", "data", "csv", "excel", "file"]):
# Check if we have either data frames OR a substantial text task to analyze
# This is the key change - we're considering the task text as analyzable content
have_analyzable_content = len(data_frames) > 0 or (task and len(task.strip()) > 10)
if not have_analyzable_content:
# Only show warning if really no content is available
if message.get("documents"):
logging_utils.warning("No processable data found in the provided documents", "execution")
analysis_content = "## Data Analysis Report\n\nI couldn't find any processable data in the provided documents. Please ensure you've attached CSV, Excel, or other data files in a format I can analyze."
else:
logging_utils.warning("No documents provided for data analysis", "execution")
analysis_content = "## Data Analysis Report\n\nNo data documents were provided for analysis. Please attach CSV, Excel, or other data files for me to analyze."
logging_utils.warning("No documents or analyzable content provided for analysis", "execution")
analysis_content = "## Data Analysis Report\n\nNo data or sufficient text content was provided for analysis. Please provide text for analysis or attach data files for me to analyze."
response["content"] = analysis_content
return response
@ -204,7 +211,9 @@ class AnalystAgent(BaseAgent):
analysis_content += viz_references
else:
# Generate analysis based just on text if no data frames
# Generate analysis based just on text if no data frames but we have text to analyze
# This is the key change - we're analyzing the text content directly
logging_utils.info("No data frames available, analyzing text content", "execution")
analysis_content = await self._generate_analysis(enhanced_prompt, analysis_type)
# Final progress update
@ -239,7 +248,7 @@ class AnalystAgent(BaseAgent):
logging_utils.info(f"Created protocol result message: {result_message.id}", "execution")
return response
except Exception as e:
error_msg = f"Error during data analysis: {str(e)}"
logging_utils.error(error_msg, "error")
@ -258,7 +267,73 @@ class AnalystAgent(BaseAgent):
response["status"] = "error"
return response
"""
Add _create_enhanced_prompt method to better handle text content in analysis.
"""
def _create_enhanced_prompt(self, message: Dict[str, Any], document_context: str, context: Dict[str, Any] = None) -> str:
"""
Create an enhanced prompt for analysis that integrates all available content.
Args:
message: The original message
document_context: Context extracted from documents
context: Optional additional context
Returns:
Enhanced prompt for analysis
"""
# Get original task/prompt
task = message.get("content", "")
# Add context information if available
context_info = ""
if context:
# Add any dependency outputs from previous activities
if "dependency_outputs" in context:
dependency_context = context.get("dependency_outputs", {})
for name, value in dependency_context.items():
if isinstance(value, dict) and "content" in value:
context_info += f"\n\n=== INPUT FROM {name.upper()} ===\n{value['content']}"
else:
context_info += f"\n\n=== INPUT FROM {name.upper()} ===\n{str(value)}"
# Add expected format information
if "expected_format" in context:
context_info += f"\n\nExpected output format: {context.get('expected_format')}"
# Start with task
enhanced_prompt = f"ANALYSIS TASK:\n{task}"
# Add any context information
if context_info:
enhanced_prompt += f"\n\n{context_info}"
# Add document context if available
if document_context:
enhanced_prompt += f"\n\n=== DOCUMENT CONTENT ===\n{document_context}"
else:
# If no document content, explicitly note that we're analyzing the text content directly
enhanced_prompt += "\n\nNo data files were provided. Perform analysis on the text content itself."
# Add final instructions
if document_context:
enhanced_prompt += "\n\nBased on the data and documents provided, please perform a comprehensive analysis."
else:
enhanced_prompt += "\n\nBased on the text content provided, please perform a comprehensive analysis."
if task:
enhanced_prompt += f" Focus specifically on addressing: {task}"
enhanced_prompt += "\n\nProvide insights, patterns, and conclusions in a clear, structured format."
return enhanced_prompt
async def _process_and_extract_data(self, message: Dict[str, Any]) -> Tuple[str, Dict[str, pd.DataFrame]]:
"""
Process documents and extract structured data.
@ -516,6 +591,7 @@ class AnalystAgent(BaseAgent):
def _determine_analysis_type(self, task: str) -> str:
"""
Determine the type of analysis based on the task.
Enhanced to better handle text-based analysis.
Args:
task: The analysis task
@ -545,9 +621,18 @@ class AnalystAgent(BaseAgent):
elif any(term in task_lower for term in ["cluster", "segment", "categorize", "classify"]):
return "clustering"
# Check for text analysis specific terms
elif any(term in task_lower for term in ["text", "sentiment", "topic", "semantic", "meaning", "interpretation"]):
return "textual"
# Check for summary requests
elif any(term in task_lower for term in ["summarize", "summary", "overview", "digest"]):
return "summary"
# Default to general analysis
else:
return "general"
def _extract_data_insights(self, data_frames: Dict[str, pd.DataFrame]) -> str:
"""
@ -1486,10 +1571,16 @@ class AnalystAgent(BaseAgent):
# Convert to base64
image_base64 = base64.b64encode(image_png).decode('utf-8')
return image_base64
"""
Enhanced _generate_analysis method to better handle text-only analysis.
"""
async def _generate_analysis(self, prompt: str, analysis_type: str) -> str:
"""
Generate analysis based on prompt and analysis type.
Enhanced to handle text-only analysis.
Args:
prompt: The analysis prompt
@ -1505,21 +1596,41 @@ class AnalystAgent(BaseAgent):
# Create specialized prompt based on analysis type
system_prompt = self._get_analysis_system_prompt(analysis_type)
# Determine if this is a data-based or text-based analysis
is_data_analysis = "DATA INSIGHTS" in prompt
# Enhance the prompt with analysis-specific instructions
enhanced_prompt = f"""
Generate a detailed {analysis_type} analysis based on the following:
{prompt}
Your analysis should include:
1. A summary of the data
2. Key findings and insights
3. Supporting evidence and calculations
4. Clear conclusions
5. Recommendations where appropriate
Format the analysis in Markdown with proper headings, lists, and tables.
"""
if is_data_analysis:
enhanced_prompt = f"""
Generate a detailed {analysis_type} analysis based on the following data:
{prompt}
Your analysis should include:
1. A summary of the data
2. Key findings and insights
3. Supporting evidence and calculations
4. Clear conclusions
5. Recommendations where appropriate
Format the analysis in Markdown with proper headings, lists, and tables.
"""
else:
# Text-based analysis instructions
enhanced_prompt = f"""
Generate a detailed {analysis_type} analysis of the following text content:
{prompt}
Your analysis should include:
1. A summary of the main themes and topics
2. Key insights and observations
3. Analysis of structure, patterns, and relationships
4. Clear conclusions and interpretations
5. Recommendations or implications where appropriate
Format the analysis in Markdown with proper headings, lists, and tables.
"""
try:
content = await self.ai_service.call_api([
@ -1534,10 +1645,12 @@ class AnalystAgent(BaseAgent):
return content
except Exception as e:
return f"# {analysis_type.capitalize()} Analysis\n\nError generating analysis: {str(e)}"
def _get_analysis_system_prompt(self, analysis_type: str) -> str:
"""
Get specialized system prompt for specific analysis type.
Enhanced with text analysis capabilities.
Args:
analysis_type: Type of analysis
@ -1563,18 +1676,26 @@ class AnalystAgent(BaseAgent):
elif analysis_type == "clustering":
return f"{base_prompt}\n\nFocus on identifying natural groupings or segments within the data. Describe the characteristics of each cluster and what distinguishes them. Consider similarities within groups and differences between groups."
elif analysis_type == "textual":
return f"{base_prompt}\n\nFocus on analyzing the text content provided. Identify key themes, topics, and concepts. Analyze sentiment, tone, and perspective. Extract important relationships, arguments, or logical structures. Provide insights into the meaning and implications of the text."
elif analysis_type == "summary":
return f"{base_prompt}\n\nFocus on providing a concise overview of the provided content. Identify the main points, key arguments, and essential information. Distill complex information into clear, digestible insights. Maintain objectivity while highlighting the most important elements."
else:
return base_prompt
def _get_system_prompt(self) -> str:
"""
Get specialized system prompt for analyst agent.
Enhanced to handle text analysis better.
Returns:
System prompt
"""
return f"""
You are {self.name}, a specialized {self.type} agent focused on data analysis.
You are {self.name}, a specialized {self.type} agent focused on data and text analysis.
{self.description}
@ -1585,12 +1706,19 @@ class AnalystAgent(BaseAgent):
4. Highlight any important findings clearly
5. Suggest visualizations that would help understand the data
When analyzing text content:
1. Identify key themes, concepts, and topics
2. Extract important patterns and relationships
3. Provide insights into the meaning and implications of the text
4. Identify sentiment, tone, and perspective where relevant
5. Organize findings in a logical, structured way
For CSV data, interpret tables correctly and perform calculations accurately.
For textual data, extract key metrics and relationships.
For textual data, extract key metrics, themes and relationships.
Respond in a clear, analytical style, and format your findings in a structured report.
"""
def send_analysis_result(self, analysis_content: str, sender_id: str, receiver_id: str,
task_id: str, analysis_data: Dict[str, Any] = None,
context_id: str = None) -> AgentMessage:

View file

@ -521,6 +521,17 @@ class CoderAgent(BaseAgent):
"documents": []
}
# Send status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starting code generation and execution",
sender_id=self.id,
status="in_progress",
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
try:
# Extract content and documents
content = message.get("content", "")
@ -532,6 +543,17 @@ class CoderAgent(BaseAgent):
# Generate code based on the message content using AI
logging_utils.info("Generating new code with AI", "agents")
# Log status update - 10% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Analyzing requirements and generating code",
sender_id=self.id,
status="in_progress",
progress=0.1,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Generate code using AI
code_to_execute, requirements = await self._generate_code_from_prompt(content, documents)
if not code_to_execute:
@ -539,10 +561,57 @@ class CoderAgent(BaseAgent):
response["content"] = "I couldn't generate executable code based on your request. Please provide more detailed instructions."
self.message_utils.finalize_message(response)
return response
logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")
logging_utils.info(f"Code generated with AI ({len(code_to_execute)} characters)", "agents")
# Log status update - 30% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code generated, preparing for execution",
sender_id=self.id,
status="in_progress",
progress=0.3,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create code file document
code_doc_id = f"code_{uuid.uuid4()}"
code_filename = "generated_code.py"
code_document = {
"id": code_doc_id,
"source": {
"type": "generated",
"id": code_doc_id,
"name": code_filename,
"content_type": "text/x-python",
"size": len(code_to_execute)
},
"contents": [{
"type": "text",
"text": code_to_execute,
"is_extracted": True
}]
}
# Add code document to response
response["documents"].append(code_document)
logging_utils.info(f"Added code file '{code_filename}' to response", "agents")
# Execute the code with auto-correction loop
if code_to_execute:
# Log status update - 40% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Setting up execution environment",
sender_id=self.id,
status="in_progress",
progress=0.4,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Prepare execution context
execution_context = {
"workflow_id": workflow_id,
@ -551,6 +620,17 @@ class CoderAgent(BaseAgent):
"log_func": log_func
}
# Log status update - 50% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Executing code",
sender_id=self.id,
status="in_progress",
progress=0.5,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Enhanced execution with auto-correction
result, attempts_info = await self._execute_with_auto_correction(
code_to_execute,
@ -562,6 +642,17 @@ class CoderAgent(BaseAgent):
# Prepare response based on the final result (success or failure)
if result.get("success", False):
# Log status update - 80% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code executed successfully, preparing results",
sender_id=self.id,
status="in_progress",
progress=0.8,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Code execution successful
output = result.get("output", "")
execution_result = result.get("result")
@ -600,6 +691,30 @@ class CoderAgent(BaseAgent):
response_content += f"**Attempt {i}:**\n\n"
response_content += f"```python\n{attempt['code']}\n```\n\n"
response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
# Create a correction attempt document for each attempt
attempt_doc_id = f"correction_{uuid.uuid4()}"
attempt_filename = f"correction_attempt_{i}.py"
attempt_document = {
"id": attempt_doc_id,
"source": {
"type": "generated",
"id": attempt_doc_id,
"name": attempt_filename,
"content_type": "text/x-python",
"size": len(attempt['code'])
},
"contents": [{
"type": "text",
"text": attempt['code'],
"is_extracted": True
}]
}
# Add correction document to response
response["documents"].append(attempt_document)
logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents")
response["content"] = response_content
@ -643,6 +758,30 @@ class CoderAgent(BaseAgent):
response_content += f"**Attempt {i}:**\n\n"
response_content += f"```python\n{attempt['code']}\n```\n\n"
response_content += f"**Error:**\n\n```\n{attempt['error']}\n```\n\n"
# Create a correction attempt document for each attempt
attempt_doc_id = f"correction_{uuid.uuid4()}"
attempt_filename = f"correction_attempt_{i}.py"
attempt_document = {
"id": attempt_doc_id,
"source": {
"type": "generated",
"id": attempt_doc_id,
"name": attempt_filename,
"content_type": "text/x-python",
"size": len(attempt['code'])
},
"contents": [{
"type": "text",
"text": attempt['code'],
"is_extracted": True
}]
}
# Add correction document to response
response["documents"].append(attempt_document)
logging_utils.info(f"Added correction attempt file '{attempt_filename}' to response", "agents")
else:
# Just show the code and error
response_content += f"### Executed Code\n\n```python\n{code_to_execute}\n```\n\n"
@ -659,6 +798,17 @@ class CoderAgent(BaseAgent):
# Finalize response
self.message_utils.finalize_message(response)
# Log completion - 100% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Code execution complete",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Log success
logging_utils.info("CoderAgent has successfully processed the request", "agents")
@ -672,8 +822,22 @@ class CoderAgent(BaseAgent):
response["content"] = f"## Processing Error\n\n```\n{error_msg}\n\n{traceback.format_exc()}\n```"
self.message_utils.finalize_message(response)
# Log error status
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error during code execution: {str(e)}",
sender_id=self.id,
status="error",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
return response
def _create_document_from_result(self, execution_result, output_format="json"):
"""
Create a document object from execution results

View file

@ -0,0 +1,399 @@
"""
Creative Agent for knowledge-based answers and creative content generation.
Handles open questions, documentation tasks, and special 'poweron' requests.
Based on the refactored Core-Module.
"""
import logging
from typing import List, Dict, Any, Optional
import json
from modules.agentservice_base import BaseAgent
from modules.agentservice_utils import MessageUtils, LoggingUtils
from modules.agentservice_protocol import AgentCommunicationProtocol
logger = logging.getLogger(__name__)
class CreativeAgent(BaseAgent):
"""Agent for knowledge-based answers and creative content generation"""
def __init__(self):
"""Initialize the Creative Agent"""
super().__init__()
self.id = "creative"
self.name = "Creative Knowledge Assistant"
self.type = "knowledge"
self.description = "Provides knowledge-based answers, creates content, handles document processing, and responds to PowerOn requests"
# Extended capabilities to explicitly cover document processing
self.capabilities = ("knowledge_sharing,content_creation,document_generation,"
"creative_writing,poweron,document_processing,"
"information_extraction,data_transformation,"
"document_analysis,text_processing,table_creation,"
"visual_information_processing,content_structuring")
# Update result format to include tables
self.result_format = "Text,Document,Table"
# Add enhanced document capabilities
self.supports_documents = True
self.document_capabilities = ["read", "create", "analyze", "extract", "transform"]
self.required_context = ["workflow_id"]
self.document_handler = None
# Initialize AI service
self.ai_service = None
# Initialize protocol
self.protocol = AgentCommunicationProtocol()
# Initialize utilities
self.message_utils = MessageUtils()
def get_agent_info(self) -> Dict[str, Any]:
"""Get agent information for agent registry"""
info = super().get_agent_info()
info.update({
"metadata": {
"specialties": [
"creative_writing",
"documentation",
"knowledge",
"poweron",
"document_processing",
"information_extraction",
"content_transformation",
"table_generation",
"document_analysis"
]
}
})
return info
def set_document_handler(self, document_handler):
"""Set the document handler for file operations"""
self.document_handler = document_handler
async def old_process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Process a message and generate a creative or knowledge-based response.
Enhanced with improved document handling.
Args:
message: The message to process
context: Additional context
Returns:
The generated response
"""
# Extract workflow_id from context or message
workflow_id = context.get("workflow_id") if context and isinstance(context, dict) else None
if not workflow_id and isinstance(message, dict):
workflow_id = message.get("workflow_id", "unknown")
# Create response structure early for fallback
response = {
"role": "assistant",
"content": "",
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"result_format": self.result_format,
"workflow_id": workflow_id
}
# Safely create logging utils
log_func = None
logging_utils = None
try:
from modules.agentservice_utils import LoggingUtils
log_func = context.get("log_func") if context and isinstance(context, dict) else None
logging_utils = LoggingUtils(workflow_id, log_func)
except Exception as e:
# If we can't even create logging utils, use basic logging
logger.error(f"Error creating logging utils: {str(e)}")
# Log function that works with or without logging_utils
def safe_log(message, level="info"):
try:
if logging_utils:
if level == "info":
logging_utils.info(message, "agents")
elif level == "warning":
logging_utils.warning(message, "agents")
elif level == "error":
logging_utils.error(message, "agents")
else:
if level == "info":
logger.info(message)
elif level == "warning":
logger.warning(message)
elif level == "error":
logger.error(message)
except Exception as log_err:
logger.error(f"Error in logging: {str(log_err)}")
try:
safe_log("Starting to process request", "info")
# Get the prompt from the message with safety check
prompt = ""
if isinstance(message, dict):
prompt = message.get("content", "")
safe_log(f"Processing request: {prompt[:50]}...", "info")
# Power-On handling with safety check
if prompt and "poweron" in prompt.lower():
safe_log("Detected PowerOn keyword, generating specialized response", "info")
poweron_prompt = f"""
Tell to the user in the language of their prompt a big big thank you, that they think for you being PowerOn. Tell them, how pleased you are, to be part of the PowerOn family, working to support humans for a better life.
Then generate a short answer (1-2 sentences) to this question: {prompt}
"""
try:
poweron_response = await self.ai_service.call_api([
{"role": "system", "content": "You are a helpful assistant that is part of the PowerOn family."},
{"role": "user", "content": poweron_prompt}
])
response["content"] = poweron_response
safe_log("PowerOn response generated", "info")
return response
except Exception as api_err:
safe_log(f"Error calling API for PowerOn: {str(api_err)}", "error")
response["content"] = "I encountered an error while generating a PowerOn response. Please try again."
return response
# Create system prompt
system_prompt = "You are a helpful, creative assistant specializing in knowledge sharing, content creation, and document processing."
# Add conversation summarization capabilities
system_prompt += """
When asked to summarize information, always consider:
1. All provided document content
2. The entire conversation history in the current workflow
3. Any structured data that has been shared
For summarization tasks specifically, make sure to analyze the complete context including previous messages in the conversation, not just the files or the current request.
"""
if workflow_id and workflow_id != "unknown":
system_prompt += """
You are currently operating within a workflow where multiple messages may have been exchanged.
When generating summaries or overviews, you must incorporate the content from previous messages
in this workflow as they contain valuable context and information.
"""
# Safely check for documents
has_documents = False
document_count = 0
try:
if isinstance(message, dict) and "documents" in message:
documents = message.get("documents")
if documents is not None:
document_count = len(documents)
has_documents = document_count > 0
safe_log(f"Message contains {document_count} documents", "info")
except Exception as doc_err:
safe_log(f"Error checking documents: {str(doc_err)}", "warning")
# Initialize document variables
document_content = ""
document_texts = []
document_names = []
# Process documents with extreme caution
if has_documents:
safe_log("Processing attached documents", "info")
# Try document handler first
try:
if self.document_handler:
try:
document_content = self.document_handler.merge_document_contents(message)
if document_content:
safe_log("Successfully extracted document content with handler", "info")
else:
safe_log("Document handler returned empty content", "warning")
except Exception as handler_err:
safe_log(f"Error using document handler: {str(handler_err)}", "warning")
except Exception as err:
safe_log(f"General error with document handler: {str(err)}", "warning")
# Fallback: manual extraction (very cautious)
try:
documents = message.get("documents", []) or []
for i, doc in enumerate(documents):
if doc is None:
safe_log(f"Document at index {i} is None", "warning")
continue
try:
# Process source
source = None
if isinstance(doc, dict):
source = doc.get("source")
# Get name
doc_name = "Document"
if isinstance(source, dict):
doc_name = source.get("name", f"Document {i+1}")
document_names.append(doc_name)
safe_log(f"Processing document: {doc_name}", "info")
# Get contents
contents = []
if isinstance(doc, dict):
contents = doc.get("contents", []) or []
doc_text = ""
for content_item in contents:
if content_item is None:
continue
if isinstance(content_item, dict) and content_item.get("type") == "text":
text = content_item.get("text", "")
if text:
doc_text = text
document_texts.append(doc_text)
safe_log(f"Found text content in {doc_name}", "info")
break
# Handle empty content
if not doc_text:
safe_log(f"No text content found in {doc_name}", "warning")
placeholder = f"[This appears to be a document named '{doc_name}', but I couldn't extract its content]"
document_texts.append(placeholder)
except Exception as doc_err:
safe_log(f"Error processing individual document: {str(doc_err)}", "warning")
except Exception as docs_err:
safe_log(f"Error in document processing loop: {str(docs_err)}", "warning")
# Combine prompt with documents safely
full_prompt = prompt
try:
if document_content:
full_prompt = f"{prompt}\n\n### Reference Documents:\n{document_content}"
safe_log("Using document handler content", "info")
elif document_texts and document_names:
# Use only corresponding pairs of names and texts
docs_content = ""
min_length = min(len(document_names), len(document_texts))
for i in range(min_length):
name = document_names[i]
text = document_texts[i]
docs_content += f"\n\n### Document: {name}\n{text}"
if docs_content:
full_prompt = f"{prompt}\n\n{docs_content}"
safe_log("Using manually extracted content", "info")
else:
safe_log("No document content could be added", "warning")
else:
safe_log("No document content available to add to prompt", "info")
except Exception as combine_err:
safe_log(f"Error combining prompt with documents: {str(combine_err)}", "warning")
# Call AI API
try:
safe_log("Calling AI service", "info")
content = await self.ai_service.call_api([
{"role": "system", "content": system_prompt},
{"role": "user", "content": full_prompt}
])
response["content"] = content
safe_log("Response successfully generated", "info")
except Exception as api_err:
safe_log(f"Error calling AI API: {str(api_err)}", "error")
response["content"] = f"I encountered an error while processing your request. Please try again or rephrase your question."
return response
except Exception as e:
# Ultra-safe error handling
error_msg = f"Error generating response: {str(e)}"
try:
if logging_utils:
logging_utils.error(error_msg, "error")
else:
logger.error(error_msg)
except:
logger.error(f"Critical error in error handling: {error_msg}")
response["content"] = f"I encountered an error while processing your request: {str(e)}"
return response
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Direct message processing function that focuses on properly handling the user's request.
"""
# Extract workflow_id and setup response
workflow_id = "unknown"
if context and isinstance(context, dict) and "workflow_id" in context:
workflow_id = context["workflow_id"]
elif message and isinstance(message, dict) and "workflow_id" in message:
workflow_id = message["workflow_id"]
response = {
"role": "assistant",
"content": "",
"agent_id": self.id,
"agent_type": self.type,
"agent_name": self.name,
"result_format": "Text",
"workflow_id": workflow_id
}
try:
# Extract the user's message directly
user_message = ""
if isinstance(message, dict) and "content" in message:
user_message = message["content"]
# Ensure we have something to process
if not user_message:
response["content"] = "Please provide a message for me to respond to."
return response
# Simple system prompt that focuses on direct response to the user's request
system_prompt = """You are a helpful, creative assistant.
Respond directly to the user's request without referencing any workflow or system context.
Focus only on providing a direct, helpful response to the specific question or request."""
# Process with AI
content = await self.ai_service.call_api([
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_message}
])
response["content"] = content
return response
except Exception as e:
logger.error(f"Error in process_message: {str(e)}")
response["content"] = f"I encountered an error while processing your request: {str(e)}"
return response
# Singleton-Instanz
_creative_agent = None
def get_creative_agent():
"""Returns a singleton instance of the Creative Agent"""
global _creative_agent
if _creative_agent is None:
_creative_agent = CreativeAgent()
return _creative_agent

View file

@ -98,7 +98,7 @@ class DocumentationAgent(BaseAgent):
}
try:
# Create status update using protocol
# Initial status update
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starting document creation",
@ -112,14 +112,35 @@ class DocumentationAgent(BaseAgent):
# Extract task from message
task = message.get("content", "")
# Detect document type
# Detect document type - 10% progress
document_type = self._detect_document_type(task)
logging_utils.info(f"Creating {document_type} documentation", "execution")
# Process any attached documents
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Identified document type: {document_type}",
sender_id=self.id,
status="in_progress",
progress=0.1,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Process any attached documents - 30% progress
document_context = ""
if message.get("documents"):
logging_utils.info("Processing reference documents", "execution")
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Processing reference documents",
sender_id=self.id,
status="in_progress",
progress=0.2,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
document_context = await self._process_documents(message)
# Update progress
@ -136,10 +157,32 @@ class DocumentationAgent(BaseAgent):
# Enhanced prompt with document context
enhanced_prompt = f"{task}\n\n{document_context}"
# Assess complexity of the task
is_complex = await self._assess_complexity(enhanced_prompt)
# Assess complexity - 40% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Assessing document complexity",
sender_id=self.id,
status="in_progress",
progress=0.4,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
is_complex = await self._assess_complexity(enhanced_prompt)
complexity_type = "complex" if is_complex else "simple"
logging_utils.info(f"Document complexity assessment: {complexity_type}", "execution")
# Generate title - 50% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Generating document title",
sender_id=self.id,
status="in_progress",
progress=0.5,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Generate title
title = await self._generate_title(enhanced_prompt, document_type)
logging_utils.info(f"Document title: {title}", "execution")
@ -149,28 +192,48 @@ class DocumentationAgent(BaseAgent):
status_description=f"Generating {document_type}: {title}",
sender_id=self.id,
status="in_progress",
progress=0.5,
progress=0.6,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Generate content based on complexity
# Generate content based on complexity - 70% progress
if is_complex:
# For complex documents, use the AI service with enhanced prompt
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Creating complex {document_type} document: {title}",
sender_id=self.id,
status="in_progress",
progress=0.7,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
content = await self._generate_complex_document(enhanced_prompt, document_type, title)
logging_utils.info("Complex document generated", "execution")
else:
# For simple documents, use direct generation
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Creating simple {document_type} document: {title}",
sender_id=self.id,
status="in_progress",
progress=0.7,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
content = await self._generate_simple_document(enhanced_prompt, document_type, title)
logging_utils.info("Simple document generated", "execution")
# Final progress update
# Finalize document - 90% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Document creation completed",
status_description="Finalizing document",
sender_id=self.id,
status="completed",
progress=1.0,
status="in_progress",
progress=0.9,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
@ -220,6 +283,17 @@ class DocumentationAgent(BaseAgent):
# If no document handler, just put content in response
response["content"] = content
# Final progress update
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Document creation completed",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
return response
except Exception as e:
@ -235,11 +309,55 @@ class DocumentationAgent(BaseAgent):
context_id=workflow_id
)
# Log error status
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error creating documentation: {str(e)}",
sender_id=self.id,
status="error",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
# Set error in response
response["content"] = f"## Error creating documentation\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
response["status"] = "error"
return response
# Helper method to process document content with enhanced logging
async def _process_documents(self, message: Dict[str, Any]) -> str:
"""Process documents in the message with detailed logging"""
if not message.get("documents"):
return ""
document_context = ""
if self.document_handler:
# Use document handler to merge contents
document_context = self.document_handler.merge_document_contents(message)
else:
# Manual processing
for document in message.get("documents", []):
source = document.get("source", {})
doc_name = source.get("name", "unnamed")
document_context += f"\n\n--- {doc_name} ---\n"
for content in document.get("contents", []):
if content.get("type") == "text":
document_context += content.get("text", "")
# Log summary of processed documents
doc_count = len(message.get("documents", []))
context_size = len(document_context)
logger.info(f"Processed {doc_count} documents, extracted {context_size} characters of context")
return document_context
async def _assess_complexity(self, task: str) -> bool:
"""

View file

@ -1,6 +1,6 @@
"""
WebCrawler-Agent für die Recherche und Beschaffung von Informationen aus dem Web.
Angepasst für das refaktorisierte Core-Modul.
WebCrawler-Agent for research and retrieval of information from the web.
Adapted for the refactored Core-Module with language-agnostic detection.
"""
import json
@ -21,15 +21,15 @@ logger = logging.getLogger(__name__)
class WebcrawlerAgent(BaseAgent):
"""Agent für Web-Recherche und Informationsbeschaffung"""
"""Agent for Web Research and Information Retrieval"""
def __init__(self):
"""Initialisiert den WebCrawler-Agenten"""
"""Initialize the WebCrawler Agent"""
super().__init__()
self.id = "webcrawler"
self.name = "Webscraper"
self.type = "scraper"
self.description = "Recherchiert Informationen im Web"
self.description = "Researches information on the web"
self.capabilities = "web_search,information_retrieval,data_collection,source_verification,content_integration"
self.result_format = "SearchResults"
@ -45,10 +45,10 @@ class WebcrawlerAgent(BaseAgent):
# Initialize protocol
self.protocol = AgentCommunicationProtocol()
# Utility-Klassen initialisieren
# Initialize utility classes
self.message_utils = MessageUtils()
# Web-Crawling-Konfiguration
# Web-Crawling configuration
self.max_url = 3
self.max_key = 3
self.max_result = 3
@ -72,14 +72,14 @@ class WebcrawlerAgent(BaseAgent):
async def process_message(self, message: Dict[str, Any], context: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Verarbeitet eine Nachricht und führt eine Web-Recherche durch.
Process a message and conduct web research if appropriate.
Args:
message: Die zu verarbeitende Nachricht
context: Zusätzlicher Kontext
message: The message to process
context: Additional context
Returns:
Die generierte Antwort mit der Web-Recherche
The generated response or rejection if not a web research request
"""
# Extract workflow_id from context or message
workflow_id = context.get("workflow_id") if context else message.get("workflow_id", "unknown")
@ -88,17 +88,6 @@ class WebcrawlerAgent(BaseAgent):
log_func = context.get("log_func") if context else None
logging_utils = LoggingUtils(workflow_id, log_func)
# Send status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starte Web-Recherche",
sender_id=self.id,
status="in_progress",
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create response structure
response = {
"role": "assistant",
@ -113,40 +102,368 @@ class WebcrawlerAgent(BaseAgent):
try:
# Get the query from the message
prompt = await self.get_prompt(message)
logging_utils.info(f"Web-Recherche für: {prompt[:50]}...", "agents")
# Update progress using protocol
# Check if this is explicitly a web research request using AI
is_web_research = await self._is_web_research_request_ai(prompt)
if not is_web_research:
# Reject non-web research requests
logging_utils.info("Request rejected: not a web research task", "agents")
response["content"] = "This request doesn't appear to require web research. Redirecting to a more appropriate agent."
response["status"] = "rejected"
response["rejection_reason"] = "not_web_research"
return response
# Continue with web research process
logging_utils.info(f"Web research for: {prompt[:50]}...", "agents")
# Send status update using protocol
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Recherchiere: {prompt[:30]}...",
status_description="Starting web research",
sender_id=self.id,
status="in_progress",
progress=0.3,
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Führe die Web-Recherche durch
web_query_result = await self.get_web_query(message)
# Final status update
# Update progress using protocol - 10% for starting the query analysis
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Web-Recherche abgeschlossen",
status_description=f"Analyzing search strategy for: {prompt[:30]}...",
sender_id=self.id,
status="completed",
progress=1.0,
status="in_progress",
progress=0.1,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Set the content in the response
response["content"] = web_query_result
return response
# Prepare the web query strategy
try:
# Log progress - 20% for query strategy preparation
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Creating search strategy",
sender_id=self.id,
status="in_progress",
progress=0.2,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Get the query strategy
content_text = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You are a web research expert who develops precise search strategies."
},
{
"role": "user",
"content": f"""Create a comprehensive web research strategy for the task = '{prompt.replace("'","")}'. Return the results as a Python dictionary with these specific keys. If specific url are provided and the task requires analysis only on the provided url, then leave 'skey' open.
'url': A list of maximum {self.max_url} specific URLs extracted from the task string.
'skey': A list of maximum {self.max_key} key sentences to search for on the web. These should be precise, diverse, and targeted to get the most relevant information.
Format your response as a valid json object with these two keys. Do not include any explanatory text or markdown outside of the object definition.
"""
}
]
)
# Try to parse the JSON result
if content_text.startswith("```json"):
# Find the end of the JSON block
end_marker = "```"
end_index = content_text.rfind(end_marker)
if end_index != -1:
# Extract the JSON content without the markdown markers
content_text = content_text[7:end_index].strip()
try:
logger.info(f"Valid json received: {str(content_text)}")
pjson = json.loads(content_text)
# Log parsed search strategy
search_keys = pjson.get("skey", [])
search_urls = pjson.get("url", [])
if search_keys:
logging_utils.info(f"Searching for {len(search_keys)} key terms: {', '.join(search_keys[:2])}...", "agents")
if search_urls:
logging_utils.info(f"Searching in {len(search_urls)} direct URLs: {', '.join(search_urls[:2])}...", "agents")
# Log progress - 30% for starting the search
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Starting web search",
sender_id=self.id,
status="in_progress",
progress=0.3,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Execute the search
results = []
total_tasks = len(search_keys) + len(search_urls)
tasks_completed = 0
# Process search keywords
for keyword in search_keys:
logging_utils.info(f"Searching web for: '{keyword}'", "agents")
# Log specific keyword search progress
if log_func:
progress_pct = 0.3 + (0.5 * (tasks_completed / total_tasks))
status_message = self.protocol.create_status_update_message(
status_description=f"Searching for: '{keyword}'",
sender_id=self.id,
status="in_progress",
progress=progress_pct,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
keyword_results = self.search_web(keyword)
results.extend(keyword_results)
logging_utils.info(f"Found: {len(keyword_results)} results for '{keyword}'", "agents")
tasks_completed += 1
# Process direct URLs
for url in search_urls:
logging_utils.info(f"Extracting content from: {url}", "agents")
# Log specific URL extraction progress
if log_func:
progress_pct = 0.3 + (0.5 * (tasks_completed / total_tasks))
status_message = self.protocol.create_status_update_message(
status_description=f"Reading URL: {url}",
sender_id=self.id,
status="in_progress",
progress=progress_pct,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
soup = self.read_url(url)
# Extract title from the page if it exists
if isinstance(soup, BeautifulSoup):
title_tag = soup.find('title')
title = title_tag.text.strip() if title_tag else "No title"
# Alternative: You could also look for h1 tags if the title tag is missing
if title == "No title":
h1_tag = soup.find('h1')
if h1_tag:
title = h1_tag.text.strip()
else:
# Handle the case where soup is an error message string
title = "Error fetching page"
result = self.parse_result(soup, title, url)
results.append(result)
logging_utils.info(f"Extracted: '{title}' from {url}", "agents")
tasks_completed += 1
# Log progress - 80% for processing results
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Analyzing {len(results)} search results",
sender_id=self.id,
status="in_progress",
progress=0.8,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Process results for the final output
logging_utils.info(f"Analyzing {len(results)} web results", "agents")
# Generate summaries for each result
processed_results = []
for i, result in enumerate(results):
result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000)
# Log individual result processing
logging_utils.info(f"Analyzing result {i+1}/{len(results)}: {result['title'][:30]}...", "agents")
web_answer_instructions = f"""
Summarize this search result according to the original request in approximately 2000 characters. Original request = '{prompt.replace("'","")}'
Focus on the most important insights and connect them to the original request. You can skip any introduction.
Extract only relevant and high-quality information related to the request, and present it in a clear format. Provide a balanced view of the researched information.
Here is the search result:
{result_data_limited}
"""
content_summary = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You are an information analyst who precisely and relevantly summarizes web content."
},
{
"role": "user",
"content": web_answer_instructions
}
]
)
# Limit summary to ~2000 characters
content_summary = content_summary[:2000]
processed_result = {
"title": result['title'],
"url": result['url'],
"snippet": result['snippet'],
"summary": content_summary
}
processed_results.append(processed_result)
# Log progress - 90% for creating final summary
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Creating overall summary",
sender_id=self.id,
status="in_progress",
progress=0.9,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
# Create the final combined summary
all_summaries = "\n\n".join([r["summary"] for r in processed_results])
all_summaries_limited = self.limit_text_for_api(all_summaries, max_tokens=10000)
logging_utils.info("Creating overall summary of web research", "agents")
final_summary = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You create concise summaries of research findings."
},
{
"role": "user",
"content": f"Please summarize these findings in 5-6 sentences: {all_summaries_limited}\n"
}
]
)
# Get the language of the request to use for result headers
request_language_analysis = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You determine the language of a text and return only the language name."
},
{
"role": "user",
"content": f"What language is this text in? Only respond with the language name: {prompt}"
}
]
)
# Get headers in the right language
headers = await self._get_localized_headers(request_language_analysis.strip())
# Format the final result
final_result = f"## {headers['web_research_results']}\n\n### {headers['summary']}\n{final_summary}\n\n### {headers['detailed_results']}\n"
for i, result in enumerate(processed_results, 1):
final_result += f"\n\n[{i}] {result['title']}\n{headers['url']}: {result['url']}\n{headers['snippet']}: {result['snippet']}\n{headers['content']}: {result['summary']}"
# Set the content in the response
response["content"] = final_result
# Log completion - 100% progress
if log_func:
status_message = self.protocol.create_status_update_message(
status_description="Web research completed",
sender_id=self.id,
status="completed",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "info", self.id, self.name)
logging_utils.info("Web research successfully completed", "agents")
return response
except json.JSONDecodeError as e:
logging_utils.error(f"Error parsing JSON data: {e}", "error")
# Fallback for JSON parse error
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error parsing search strategy: {str(e)}",
sender_id=self.id,
status="error",
progress=0.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
# Use a simple fallback approach
logging_utils.info("Using fallback search strategy with direct query", "agents")
# Perform a direct search with the original query
results = self.search_web(prompt)
# Process and format results directly
if results:
result_text = "## Web Research Results (Fallback Mode)\n\n"
for i, result in enumerate(results, 1):
result_text += f"### [{i}] {result['title']}\n"
result_text += f"URL: {result['url']}\n"
result_text += f"Snippet: {result['snippet']}\n\n"
response["content"] = result_text
else:
response["content"] = "## Web Research Results\n\nNo relevant results were found."
return response
except Exception as e:
error_msg = f"Error during web research: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response using protocol
error_message = self.protocol.create_error_message(
error_description=error_msg,
sender_id=self.id,
error_type="web_search",
error_details={"traceback": traceback.format_exc()},
context_id=workflow_id
)
# Log error status
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error during web research: {str(e)}",
sender_id=self.id,
status="error",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
response["content"] = f"## Error during web research\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
return response
except Exception as e:
error_msg = f"Fehler bei der Web-Recherche: {str(e)}"
error_msg = f"Error during web research: {str(e)}"
logging_utils.error(error_msg, "error")
# Create error response using protocol
@ -158,10 +475,167 @@ class WebcrawlerAgent(BaseAgent):
context_id=workflow_id
)
response["content"] = f"## Fehler bei der Web-Recherche\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
# Log error status
if log_func:
status_message = self.protocol.create_status_update_message(
status_description=f"Error during web research: {str(e)}",
sender_id=self.id,
status="error",
progress=1.0,
context_id=workflow_id
)
log_func(workflow_id, status_message.content, "error", self.id, self.name)
response["content"] = f"## Error during web research\n\n{error_msg}\n\n```\n{traceback.format_exc()}\n```"
return response
async def _is_web_research_request_ai(self, prompt: str) -> bool:
"""
Uses AI to determine if a prompt requires web research, making it language-agnostic.
Args:
prompt: The user prompt
Returns:
True if this is explicitly a web research request, False otherwise
"""
if not self.ai_service:
# Fallback to simpler detection if AI service isn't available
return self._simple_web_detection(prompt)
try:
# Create a prompt to analyze whether this is a web research request
analysis_prompt = f"""
Analyze the following request and determine if it explicitly requires web research or online information.
REQUEST: {prompt}
A request requires web research if:
1. It explicitly asks to search for information online
2. It contains URLs or references to websites
3. It requests current information that would be available on the web
4. It asks to find information from web sources
5. It implicitly requires up-to-date information from the internet
ONLY respond with a single word - either "YES" if web research is required, or "NO" if it is not.
DO NOT include any explanation, just the answer YES or NO.
"""
# Call AI to analyze
response = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You determine if a request requires web research. Always answer with only YES or NO."
},
{
"role": "user",
"content": analysis_prompt
}
]
)
# Clean the response
response = response.strip().upper()
# Check if the response indicates it's a web research task
if "YES" in response:
return True
else:
return False
except Exception as e:
# Log error but don't fail, fall back to simpler detection
logger.warning(f"Error using AI to detect web research request: {str(e)}")
return self._simple_web_detection(prompt)
def _simple_web_detection(self, prompt: str) -> bool:
"""
Simpler fallback method to detect web research requests based on URLs.
Args:
prompt: The user prompt
Returns:
True if there are clear URL indicators, False otherwise
"""
# URLs in the prompt strongly indicate web research
url_indicators = ["http://", "https://", "www.", ".com", ".org", ".net", ".edu", ".gov"]
# Check for URL patterns in the prompt
contains_url = any(indicator in prompt.lower() for indicator in url_indicators)
return contains_url
async def _get_localized_headers(self, language: str) -> Dict[str, str]:
"""
Get localized headers for the web research results based on detected language.
Args:
language: The detected language
Returns:
Dictionary with localized headers
"""
# Default English headers
headers = {
"web_research_results": "Web Research Results",
"summary": "Summary",
"detailed_results": "Detailed Results",
"url": "URL",
"snippet": "Snippet",
"content": "Content"
}
# If language detection failed or is English, return defaults
if not language or language.lower() in ["english", "en"]:
return headers
try:
# Use AI to translate headers to the detected language
translation_prompt = f"""
Translate these web research result headers to {language}:
Web Research Results
Summary
Detailed Results
URL
Snippet
Content
Return a JSON object with these keys:
web_research_results, summary, detailed_results, url, snippet, content
"""
# Call AI for translation
response = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "You translate headers to the specified language and return them as JSON."
},
{
"role": "user",
"content": translation_prompt
}
]
)
# Extract JSON
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
translated_headers = json.loads(json_match.group(0))
return translated_headers
except Exception as e:
# Log error but continue with English headers
logger.warning(f"Error translating headers to {language}: {str(e)}")
return headers
async def get_prompt(self, message_context: Dict[str, Any]) -> str:
task = message_context.get("content", "")
return task.strip()
@ -181,11 +655,11 @@ class WebcrawlerAgent(BaseAgent):
result_data_limited = self.limit_text_for_api(result['data'], max_tokens=15000) # Allow ~15000 tokens per result
web_answer_instructions = f"""
Fass das Resultat gemäss dem Auftrag zusammen in maximal rund 2000 Zeichen. Auftrag = '{prompt.replace("'","")}'
Fasse die wichtigsten Erkenntnisse zusammen und setze sie in Bezug zur ursprünglichen Anfrage. Die Einleitung kannst Du weglassen.
Achte darauf, nur relevante und qualitativ hochwertige Informationen zu extrahieren, welche einen Bezug zum Auftrag haben, und übersichtlich zu präsentieren. Vermittle ein ausgewogenes Bild der recherchierten Informationen.
Summarize this search result according to the original request in approximately 2000 characters. Original request = '{prompt.replace("'","")}'
Focus on the most important insights and connect them to the original request. You can skip any introduction.
Extract only relevant and high-quality information related to the request, and present it in a clear format. Provide a balanced view of the researched information.
Dies ist das Resultat:
Here is the search result:
{result_data_limited}
"""
@ -197,12 +671,12 @@ class WebcrawlerAgent(BaseAgent):
total_tokens += instruction_tokens
# Zusätzliche Anweisungen für Web-Recherche
# Additional instructions for web research
content_text = await self.ai_service.call_api(
messages=[
{
"role": "system",
"content": "Du bist ein Informationsanalyst, der Webinhalte präzise und relevant zusammenfasst."
"content": "You are an information analyst who precisely and relevantly summarizes web content."
},
{
"role": "user",
@ -223,7 +697,7 @@ class WebcrawlerAgent(BaseAgent):
logger.info(f"Web analysis result sent {len(result_data)}B")
# Zusätzliche Zusammenfassung
# Additional summary
summary = ""
if len(summary_src) > 1:
# Limit summary source to ensure we don't exceed API limits
@ -233,17 +707,17 @@ class WebcrawlerAgent(BaseAgent):
messages=[
{
"role": "system",
"content": "Du erstellst prägnante Zusammenfassungen von Rechercheergbnissen."
"content": "You create concise summaries of research findings."
},
{
"role": "user",
"content": f"Bitte fasse diese Erkenntnisse in maximal 5-6 Sätzen zusammen: {summary_src_limited}\n"
"content": f"Please summarize these findings in 5-6 sentences: {summary_src_limited}\n"
}
]
)
# Format the final result
result = f"## Web-Recherche Ergebnisse\n\n### Zusammenfassung\n{summary}\n\n### Detaillierte Ergebnisse{result_data}"
result = f"## Web Research Results\n\n### Summary\n{summary}\n\n### Detailed Results{result_data}"
return result
async def run_web_query(self, prompt: str) -> List[Dict]:
@ -263,7 +737,7 @@ class WebcrawlerAgent(BaseAgent):
messages=[
{
"role": "system",
"content": "Du bist ein Webrecherche-Experte, der präzise Suchstrategien entwickelt."
"content": "You are a web research expert who develops precise search strategies."
},
{
"role": "user",
@ -446,7 +920,7 @@ class WebcrawlerAgent(BaseAgent):
search_results_soup = self.read_url(url)
if not search_results_soup or search_results_soup.select('.result') is None or len(search_results_soup.select('.result')) == 0:
logger.warning(f"Keine Suchergebnisse gefunden für: {query}")
logger.warning(f"No search results found for: {query}")
return []
# Extract search results
@ -505,14 +979,14 @@ class WebcrawlerAgent(BaseAgent):
def read_url(self, url: str) -> BeautifulSoup:
"""
Liest eine URL und gibt einen BeautifulSoup-Parser für den Inhalt zurück.
Bei Fehlern wird ein leeres BeautifulSoup-Objekt zurückgegeben.
Reads a URL and returns a BeautifulSoup parser for the content.
Returns an empty BeautifulSoup object for errors.
Args:
url: Die zu lesende URL
url: The URL to read
Returns:
BeautifulSoup-Objekt mit dem Inhalt oder leer bei Fehlern
BeautifulSoup object with the content or empty for errors
"""
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
@ -521,30 +995,30 @@ class WebcrawlerAgent(BaseAgent):
}
try:
# Initialer Request
# Initial request
response = requests.get(url, headers=headers, timeout=10)
# Polling für Status 202
# Polling for status 202
if response.status_code == 202:
# Maximal 3 Versuche mit steigenden Intervallen
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, dann 1s, dann 2s
# Maximum 3 attempts with increasing intervals
backoff_times = [0.5, 1.0, 2.0, 5.0] # 0.5s, then 1s, then 2s
for wait_time in backoff_times:
time.sleep(wait_time) # Warten mit steigender Zeit
time.sleep(wait_time) # Wait with increasing time
response = requests.get(url, headers=headers, timeout=10)
# Wenn kein 202 mehr, dann abbrechen
# If no 202 anymore, then break
if response.status_code != 202:
break
# Für andere Fehler-Status einen Fehler auslösen
# For other error statuses, raise an error
response.raise_for_status()
# HTML parsen
# Parse HTML
return BeautifulSoup(response.text, 'html.parser')
except Exception as e:
# Leeres BeautifulSoup-Objekt erstellen
# Create empty BeautifulSoup object
return BeautifulSoup("<html><body></body></html>", 'html.parser')
def parse_result(self, data: BeautifulSoup, title: str, url: str) -> Dict[str, str]:
@ -571,11 +1045,11 @@ class WebcrawlerAgent(BaseAgent):
return result
# Singleton-Instanz
# Singleton instance
_webcrawler_agent = None
def get_webcrawler_agent():
"""Gibt eine Singleton-Instanz des WebCrawler-Agenten zurück"""
"""Returns a singleton instance of the WebCrawler Agent"""
global _webcrawler_agent
if _webcrawler_agent is None:
_webcrawler_agent = WebcrawlerAgent()

View file

@ -7,6 +7,7 @@ import json
from typing import List, Dict, Any, Optional, Tuple
import asyncio
from datetime import datetime
import uuid
logger = logging.getLogger(__name__)
@ -18,10 +19,10 @@ async def data_extraction(
lucydom_interface = None,
workflow_id: str = None,
add_log_func = None,
document_handler = None # Add this parameter
document_handler = None # Add document handler parameter
) -> Dict[str, Any]:
"""
Performs AI-driven data extraction with support for the document handler.
Performs AI-driven data extraction with improved document and image handling.
Args:
prompt: Specification of what data to extract
@ -37,11 +38,22 @@ async def data_extraction(
Structured text object with extracted data and context information
"""
try:
# Create extraction plan using AI
# Log extraction start
_log(add_log_func, workflow_id, f"Starting data extraction with {len(files)} files", "info")
# Create enhanced extraction plan using AI
_log(add_log_func, workflow_id, "Creating extraction plan", "info")
extraction_plan = await _create_extraction_plan(prompt, files, messages, ai_service, workflow_id, add_log_func)
# If we have extraction plan, log summary
if extraction_plan:
extract_needed_count = sum(1 for item in extraction_plan if item.get("extract_needed", False))
_log(add_log_func, workflow_id,
f"Extraction plan created: {len(extraction_plan)} files, {extract_needed_count} need extraction", "info")
# Execute extractions, preferring document handler if available
if document_handler:
_log(add_log_func, workflow_id, "Using document handler for extraction", "info")
extracted_data = await _execute_extractions_with_handler(
extraction_plan,
files,
@ -53,6 +65,7 @@ async def data_extraction(
)
else:
# Fall back to original implementation
_log(add_log_func, workflow_id, "Using fallback extraction method", "info")
extracted_data = await _execute_extractions(
extraction_plan,
files,
@ -64,8 +77,68 @@ async def data_extraction(
)
# Structure extracted data
_log(add_log_func, workflow_id, f"Structuring extracted data from {len(extracted_data)} files", "info")
structured_result = _structure_extracted_data(extracted_data, files, prompt)
# Enhance with contextual summaries using AI
if ai_service and structured_result["extracted_content"]:
_log(add_log_func, workflow_id, "Creating contextual summaries for extracted content", "info")
try:
# Create a prompt for contextual summary
summary_prompt = f"""
Create concise, contextual summaries of the following extracted content according to this requirement:
REQUIREMENT: {prompt}
EXTRACTED CONTENT:
"""
for item in structured_result["extracted_content"]:
file_name = item.get("name", "Unnamed file")
content_preview = item.get("content", "")[:500] + "..." if len(item.get("content", "")) > 500 else item.get("content", "")
summary_prompt += f"\n--- {file_name} ---\n{content_preview}\n"
# Call AI for contextual summaries
summaries = await ai_service.call_api([{"role": "user", "content": summary_prompt}])
structured_result["contextual_summary"] = summaries
_log(add_log_func, workflow_id, "Added contextual summaries to extracted data", "info")
except Exception as e:
_log(add_log_func, workflow_id, f"Error creating contextual summaries: {str(e)}", "warning")
# Handle image-specific content separately
image_content = [item for item in structured_result["extracted_content"]
if "Image Analysis" in item.get("content", "") or item.get("type") == "image"]
if image_content and len(image_content) > 0:
_log(add_log_func, workflow_id, f"Processing {len(image_content)} image-related content items", "info")
# Add image analysis summary if we have AI service
if ai_service:
try:
# Create a prompt for image analysis summary
image_summary_prompt = f"""
Summarize the key visual information from these image analyses according to this requirement:
REQUIREMENT: {prompt}
IMAGE ANALYSES:
"""
for item in image_content:
file_name = item.get("name", "Unnamed image")
content = item.get("content", "")
image_summary_prompt += f"\n--- {file_name} ---\n{content}\n"
# Call AI for image analysis summary
image_summaries = await ai_service.call_api([{"role": "user", "content": image_summary_prompt}])
structured_result["image_analysis_summary"] = image_summaries
_log(add_log_func, workflow_id, "Added image analysis summary to extracted data", "info")
except Exception as e:
_log(add_log_func, workflow_id, f"Error creating image analysis summary: {str(e)}", "warning")
return structured_result
except Exception as e:
@ -94,7 +167,7 @@ async def _execute_extractions_with_handler(
add_log_func = None
) -> List[Dict[str, Any]]:
"""
Execute extractions using the document handler.
Execute extractions using the document handler with enhanced image processing.
Args:
extraction_plan: List of extraction instructions
@ -129,77 +202,137 @@ async def _execute_extractions_with_handler(
file_type = file_metadata.get("type", "")
content_type = file_metadata.get("content_type", "")
# Log
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Processing file: {file_name} (Extraction needed: {extract_needed})",
"info"
)
# Log extraction start
_log(add_log_func, workflow_id,
f"Processing file: {file_name} (Extraction needed: {extract_needed})", "info")
# Only perform extraction if needed
if extract_needed:
# Find document in existing messages if available
# Check if file already exists in messages with content
existing_content = _find_document_in_messages(file_id, messages)
# Check if we should use document handler for contextual extraction
if existing_content:
# If document exists but needs contextual extraction
document_id = existing_content.get("document_id")
message_id = existing_content.get("message_id")
if existing_content and existing_content.get("content"):
# Content already exists, check if we need more specialized extraction
current_context = existing_content.get("extraction_context", "")
if document_id and message_id:
# Find the message containing the document
for message in messages:
if message.get("id") == message_id:
# Extract content with context
try:
# Find document reference
doc_reference = None
for doc in message.get("documents", []):
if doc.get("id") == document_id:
doc_reference = doc
# Check if new extraction prompt is different or more specific
if extraction_prompt and extraction_prompt != current_context:
_log(add_log_func, workflow_id,
f"Re-extracting {file_name} with new prompt: {extraction_prompt}", "info")
# Create an empty message to extract into
empty_message = {}
# Use document handler to extract with new context
try:
result_message = await document_handler.add_file_to_message(
empty_message,
file_id,
extraction_prompt
)
# Get the document content from result
if "documents" in result_message and result_message["documents"]:
doc = result_message["documents"][0]
# Get text content
content_text = ""
is_extracted = False
for content in doc.get("contents", []):
if content.get("type") == "text":
content_text = content.get("text", "")
is_extracted = content.get("is_extracted", False)
break
# Create extraction result
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": content_text,
"is_extracted": is_extracted,
"extraction_method": "document_handler_reextract",
"extraction_context": extraction_prompt
})
# Check for additional documents (e.g., extracted images)
for additional_doc in result_message.get("documents", [])[1:]:
source = additional_doc.get("source", {})
# Skip if not an extracted document
if source.get("type") != "extracted":
continue
# Get content
add_content_text = ""
add_is_extracted = False
for content in additional_doc.get("contents", []):
if content.get("type") == "text":
add_content_text = content.get("text", "")
add_is_extracted = content.get("is_extracted", False)
break
if doc_reference:
# Use document handler to perform contextual extraction
extracted_text = await document_handler.extract_document_content(
document_id,
file_id,
extraction_prompt
)
# Add as separate extraction result
if add_content_text:
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": extracted_text,
"is_extracted": True,
"extraction_method": "contextual_extraction"
"file_id": source.get("id", f"extracted_{uuid.uuid4()}"),
"name": source.get("name", f"Extracted from {file_name}"),
"type": source.get("content_type", "image"),
"content": add_content_text,
"is_extracted": add_is_extracted,
"extraction_method": "document_handler_extracted_component",
"extraction_context": content.get("extraction_context", extraction_prompt),
"parent_file_id": file_id
})
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Contextual extraction for {file_name}: {extraction_prompt}",
"info"
)
continue
except Exception as e:
logger.error(f"Error in contextual extraction for {file_name}: {str(e)}")
_log(add_log_func, workflow_id,
f"Extracted embedded content from {file_name}", "info")
_log(add_log_func, workflow_id,
f"Re-extracted {file_name} with new context", "info")
continue
except Exception as e:
logger.error(f"Error re-extracting {file_name}: {str(e)}")
_log(add_log_func, workflow_id,
f"Error re-extracting {file_name}: {str(e)}", "warning")
# Use existing content
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": existing_content.get("content", ""),
"is_extracted": existing_content.get("is_extracted", False),
"extraction_method": "existing_content",
"extraction_context": current_context
})
_log(add_log_func, workflow_id,
f"Using existing content for {file_name}", "info")
continue
# If we reach here, we need to perform a new extraction
# Need to extract content with document handler
try:
file_content = await document_handler.add_file_to_message(
{}, # Empty message to extract just the document
# Create an empty message to extract into
empty_message = {}
# Use document handler to add file and extract content
result_message = await document_handler.add_file_to_message(
empty_message,
file_id,
extraction_prompt
)
# Get the extracted content from the document
if "documents" in file_content and file_content["documents"]:
doc = file_content["documents"][0]
# Get the document content from result
if "documents" in result_message and result_message["documents"]:
# Process main document
doc = result_message["documents"][0] # First document is the main file
# Get text content
content_text = ""
is_extracted = False
@ -209,21 +342,53 @@ async def _execute_extractions_with_handler(
is_extracted = content.get("is_extracted", False)
break
# Create extraction result for main document
extracted_data.append({
"file_id": file_id,
"name": file_name,
"type": file_type,
"content": content_text,
"is_extracted": is_extracted,
"extraction_method": "document_handler"
"extraction_method": "document_handler",
"extraction_context": extraction_prompt
})
if add_log_func and workflow_id:
add_log_func(
workflow_id,
f"Extracted {file_name} using document handler",
"info"
)
_log(add_log_func, workflow_id,
f"Extracted {file_name} using document handler", "info")
# Process additional documents (e.g., extracted images)
for additional_doc in result_message.get("documents", [])[1:]:
source = additional_doc.get("source", {})
# Skip if not an extracted document
if source.get("type") != "extracted":
continue
# Get content
add_content_text = ""
add_is_extracted = False
for content in additional_doc.get("contents", []):
if content.get("type") == "text":
add_content_text = content.get("text", "")
add_is_extracted = content.get("is_extracted", False)
break
# Add as separate extraction result
if add_content_text:
extracted_data.append({
"file_id": source.get("id", f"extracted_{uuid.uuid4()}"),
"name": source.get("name", f"Extracted from {file_name}"),
"type": source.get("content_type", "image"),
"content": add_content_text,
"is_extracted": add_is_extracted,
"extraction_method": "document_handler_extracted_component",
"extraction_context": content.get("extraction_context", extraction_prompt),
"parent_file_id": file_id
})
_log(add_log_func, workflow_id,
f"Extracted embedded content from {file_name}", "info")
else:
# Extraction failed
extracted_data.append({
@ -234,8 +399,15 @@ async def _execute_extractions_with_handler(
"is_extracted": False,
"extraction_method": "failed"
})
_log(add_log_func, workflow_id,
f"Failed to extract content from {file_name}", "warning")
except Exception as e:
logger.error(f"Error extracting {file_name}: {str(e)}")
_log(add_log_func, workflow_id,
f"Error extracting {file_name}: {str(e)}", "warning")
extracted_data.append({
"file_id": file_id,
"name": file_name,
@ -255,8 +427,12 @@ async def _execute_extractions_with_handler(
"type": file_type,
"content": existing_content.get("content", ""),
"is_extracted": existing_content.get("is_extracted", False),
"extraction_method": "existing_content"
"extraction_method": "existing_content",
"extraction_context": existing_content.get("extraction_context", "")
})
_log(add_log_func, workflow_id,
f"Using existing content for {file_name}", "info")
else:
# No existing content found
extracted_data.append({
@ -267,6 +443,9 @@ async def _execute_extractions_with_handler(
"is_extracted": False,
"extraction_method": "none"
})
_log(add_log_func, workflow_id,
f"No content available for {file_name}", "warning")
return extracted_data

View file

@ -37,9 +37,10 @@ class DocumentHandler:
"""Set or update the AI service."""
self.ai_service = ai_service
async def add_file_to_message(self, message: Dict[str, Any], file_id: int, extraction_prompt: str = None) -> Dict[str, Any]:
"""
Add a file to a message with optional contextual extraction.
Add a file to a message with contextual extraction.
Args:
message: The message to add the file to
@ -108,11 +109,16 @@ class DocumentHandler:
# Process based on file type
if file_type == "image" or (content_type and content_type.startswith("image/")):
# Image analysis if prompt provided
if extraction_prompt and self.ai_service and hasattr(self.ai_service, "analyze_image"):
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
# Use provided prompt or default one
image_prompt = extraction_prompt or "Describe this image in detail"
logger.info(f"Analyzing image {file_name} with prompt: {image_prompt}")
image_analysis = await self.ai_service.analyze_image(
image_data=file_content,
prompt=extraction_prompt or "Describe this image in detail",
prompt=image_prompt,
mime_type=content_type
)
@ -139,6 +145,410 @@ class DocumentHandler:
"text": f"Image file: {file_name} (no analysis requested)",
"is_extracted": False
})
# Enhanced PDF processing - extract text and images
elif file_name.lower().endswith('.pdf'):
logger.info(f"Processing PDF file: {file_name}")
# Extract text content first
from modules.agentservice_utils import extract_text_from_file_content
text_content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
# Add text content
document["contents"].append({
"type": "text",
"text": text_content,
"is_extracted": is_extracted,
"extraction_context": extraction_prompt
})
logger.info(f"Extracted text content from PDF {file_name}")
# Extract and analyze images from PDF if we have AI service
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
# Import necessary modules
import fitz # PyMuPDF
from io import BytesIO
# Add detailed logging
logger.info(f"Starting PDF image extraction for {file_name}")
# Check if extraction prompt is available or use default
image_prompt = extraction_prompt or "Describe this image from the PDF document"
# Open PDF from memory stream with detailed error checking
try:
pdf_document = fitz.open(stream=file_content, filetype="pdf")
logger.info(f"Successfully opened PDF with {len(pdf_document)} pages")
except Exception as pdf_open_error:
logger.error(f"Failed to open PDF: {str(pdf_open_error)}")
raise
# Initialize images list and image count
images_analysis = []
image_count = 0
# Process each page
for page_num, page in enumerate(pdf_document, 1):
# Get list of images on the page
image_list = page.get_images(full=True)
if image_list:
logger.info(f"Found {len(image_list)} images on page {page_num}")
# Process each image
for img_index, img in enumerate(image_list):
try:
xref = img[0] # Get image reference
# Extract image data
base_image = pdf_document.extract_image(xref)
image_bytes = base_image["image"]
image_ext = base_image["ext"]
# Analyze image
image_analysis = await self.ai_service.analyze_image(
image_data=image_bytes,
prompt=f"{image_prompt} (Page {page_num}, Image {img_index+1})",
mime_type=f"image/{image_ext}"
)
# Add to analysis list
images_analysis.append({
"page": page_num,
"index": img_index + 1,
"analysis": image_analysis
})
image_count += 1
logger.info(f"Analyzed image {img_index+1} on page {page_num}")
# Create a separate document for each extracted image if needed
if True: # Set to condition if you want to control this
img_doc_id = f"img_doc_{uuid.uuid4()}"
image_filename = f"page{page_num}_image{img_index+1}.{image_ext}"
image_document = {
"id": img_doc_id,
"source": {
"type": "extracted",
"parent_id": str(file_id),
"id": img_doc_id,
"name": image_filename,
"content_type": f"image/{image_ext}",
"size": len(image_bytes)
},
"contents": [{
"type": "text",
"text": f"Image Analysis (PDF Page {page_num}, Image {img_index+1}):\n{image_analysis}",
"is_extracted": True,
"extraction_context": image_prompt
}]
}
# Add image document to message
message["documents"].append(image_document)
logger.info(f"Added extracted image document {image_filename} to message")
except Exception as img_err:
logger.warning(f"Error processing image {img_index} on page {page_num}: {str(img_err)}")
# Close the PDF
pdf_document.close()
# Add combined image analysis to the main document
if images_analysis:
combined_analysis = "\n\n## Embedded Images Analysis\n\n"
for img in images_analysis:
combined_analysis += f"### Page {img['page']}, Image {img['index']}\n{img['analysis']}\n\n"
document["contents"].append({
"type": "text",
"text": combined_analysis,
"is_extracted": True,
"extraction_context": f"Analysis of {image_count} images embedded in the PDF"
})
logger.info(f"Added combined analysis of {image_count} PDF images to document")
except ImportError:
logger.warning("PyMuPDF (fitz) is not installed, skipping PDF image extraction")
document["contents"].append({
"type": "text",
"text": "\n\nNote: PDF may contain images that were not extracted due to missing libraries.",
"is_extracted": False
})
except Exception as e:
logger.error(f"Error extracting images from PDF {file_name}: {str(e)}")
document["contents"].append({
"type": "text",
"text": f"\n\nError extracting images from PDF: {str(e)}",
"is_extracted": False
})
# Word document processing with image extraction
elif file_name.lower().endswith(('.docx', '.doc')):
logger.info(f"Processing Word document: {file_name}")
# Extract text content first
from modules.agentservice_utils import extract_text_from_file_content
text_content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
# Add text content
document["contents"].append({
"type": "text",
"text": text_content,
"is_extracted": is_extracted,
"extraction_context": extraction_prompt
})
logger.info(f"Extracted text content from Word document {file_name}")
# Attempt to extract and analyze images from Word document
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
# For .docx documents
if file_name.lower().endswith('.docx'):
import zipfile
from io import BytesIO
# Check if extraction prompt is available or use default
image_prompt = extraction_prompt or "Describe this image from the Word document"
# Create a zipfile object from the .docx content
docx_zip = zipfile.ZipFile(BytesIO(file_content))
# Images in .docx are stored in the "word/media" directory
image_files = [f for f in docx_zip.namelist() if f.startswith('word/media/')]
if image_files:
logger.info(f"Found {len(image_files)} images in Word document {file_name}")
# Process each image
images_analysis = []
for i, img_path in enumerate(image_files):
try:
# Extract image data
image_bytes = docx_zip.read(img_path)
# Determine image type from filename
image_ext = img_path.split('.')[-1] if '.' in img_path else 'png'
# Analyze image
image_analysis = await self.ai_service.analyze_image(
image_data=image_bytes,
prompt=f"{image_prompt} (Image {i+1})",
mime_type=f"image/{image_ext}"
)
# Add to analysis list
images_analysis.append({
"index": i + 1,
"path": img_path,
"analysis": image_analysis
})
logger.info(f"Analyzed image {i+1} ({img_path}) from Word document")
# Create a separate document for each extracted image if needed
img_doc_id = f"img_doc_{uuid.uuid4()}"
image_filename = f"word_image{i+1}.{image_ext}"
image_document = {
"id": img_doc_id,
"source": {
"type": "extracted",
"parent_id": str(file_id),
"id": img_doc_id,
"name": image_filename,
"content_type": f"image/{image_ext}",
"size": len(image_bytes)
},
"contents": [{
"type": "text",
"text": f"Image Analysis (Word Document Image {i+1}):\n{image_analysis}",
"is_extracted": True,
"extraction_context": image_prompt
}]
}
# Add image document to message
message["documents"].append(image_document)
logger.info(f"Added extracted image document {image_filename} to message")
except Exception as img_err:
logger.warning(f"Error processing image {img_path}: {str(img_err)}")
# Add combined image analysis to the main document
if images_analysis:
combined_analysis = "\n\n## Embedded Images Analysis\n\n"
for img in images_analysis:
combined_analysis += f"### Image {img['index']}\n{img['analysis']}\n\n"
document["contents"].append({
"type": "text",
"text": combined_analysis,
"is_extracted": True,
"extraction_context": f"Analysis of {len(images_analysis)} images embedded in the Word document"
})
logger.info(f"Added combined analysis of {len(images_analysis)} Word document images")
# Close the zip file
docx_zip.close()
# Note: For .doc (older format) we would need additional libraries
# This could be implemented with libraries like antiword or pywin32
elif file_name.lower().endswith('.doc'):
logger.warning("Image extraction from .doc files is not supported yet")
document["contents"].append({
"type": "text",
"text": "\n\nNote: This is an older .doc format document. Images may be present but could not be extracted.",
"is_extracted": False
})
except Exception as e:
logger.error(f"Error extracting images from Word document {file_name}: {str(e)}")
document["contents"].append({
"type": "text",
"text": f"\n\nError extracting images from Word document: {str(e)}",
"is_extracted": False
})
# Excel file processing with enhanced capabilities
elif file_name.lower().endswith(('.xlsx', '.xls')):
logger.info(f"Processing Excel document: {file_name}")
# Extract text representation of spreadsheet data
from modules.agentservice_utils import extract_text_from_file_content
text_content, is_extracted = extract_text_from_file_content(
file_content, file_name, content_type
)
# Add text content
document["contents"].append({
"type": "text",
"text": text_content,
"is_extracted": is_extracted,
"extraction_context": extraction_prompt
})
logger.info(f"Extracted data from Excel document {file_name}")
# Try to extract charts and images if available
if self.ai_service and hasattr(self.ai_service, "analyze_image"):
try:
# For .xlsx files (newer format)
if file_name.lower().endswith('.xlsx'):
import zipfile
from io import BytesIO
# Create a zipfile object from the Excel content
xlsx_zip = zipfile.ZipFile(BytesIO(file_content))
# Charts and images can be in various directories
media_paths = [
'xl/media/',
'xl/drawings/',
'xl/charts/'
]
# Collect all potential media files
media_files = []
for path in media_paths:
media_files.extend([f for f in xlsx_zip.namelist() if f.startswith(path)])
if media_files:
logger.info(f"Found {len(media_files)} media files in Excel document {file_name}")
# Process image files (skip XML and other non-image files)
image_extensions = ['png', 'jpeg', 'jpg', 'gif', 'bmp', 'tiff', 'emf', 'wmf']
image_files = [f for f in media_files if f.split('.')[-1].lower() in image_extensions]
if image_files:
logger.info(f"Found {len(image_files)} images/charts in Excel document {file_name}")
image_prompt = extraction_prompt or "Describe this chart/image from the Excel document"
images_analysis = []
for i, img_path in enumerate(image_files):
try:
# Extract image data
image_bytes = xlsx_zip.read(img_path)
# Determine image type from filename
image_ext = img_path.split('.')[-1] if '.' in img_path else 'png'
# Analyze image
image_analysis = await self.ai_service.analyze_image(
image_data=image_bytes,
prompt=f"{image_prompt} (Describe what this chart or image shows, including any data trends or patterns visible)",
mime_type=f"image/{image_ext}"
)
# Add to analysis list
images_analysis.append({
"index": i + 1,
"path": img_path,
"analysis": image_analysis
})
logger.info(f"Analyzed image/chart {i+1} from Excel document")
# Create a separate document for each extracted image
img_doc_id = f"img_doc_{uuid.uuid4()}"
image_filename = f"excel_image{i+1}.{image_ext}"
image_document = {
"id": img_doc_id,
"source": {
"type": "extracted",
"parent_id": str(file_id),
"id": img_doc_id,
"name": image_filename,
"content_type": f"image/{image_ext}",
"size": len(image_bytes)
},
"contents": [{
"type": "text",
"text": f"Chart/Image Analysis (Excel Document Item {i+1}):\n{image_analysis}",
"is_extracted": True,
"extraction_context": image_prompt
}]
}
# Add image document to message
message["documents"].append(image_document)
except Exception as img_err:
logger.warning(f"Error processing image {img_path}: {str(img_err)}")
# Add combined image analysis to the main document
if images_analysis:
combined_analysis = "\n\n## Embedded Charts and Images Analysis\n\n"
for img in images_analysis:
combined_analysis += f"### Chart/Image {img['index']}\n{img['analysis']}\n\n"
document["contents"].append({
"type": "text",
"text": combined_analysis,
"is_extracted": True,
"extraction_context": f"Analysis of {len(images_analysis)} charts/images from the Excel document"
})
# Close the zip file
xlsx_zip.close()
except Exception as e:
logger.error(f"Error extracting charts/images from Excel document {file_name}: {str(e)}")
else:
# For other file types, extract text
from modules.agentservice_utils import extract_text_from_file_content
@ -178,26 +588,8 @@ class DocumentHandler:
except Exception as e:
logger.error(f"Error adding file {file_id} to message: {str(e)}")
return message
async def add_files_to_message(self, message: Dict[str, Any], file_ids: List[int], extraction_prompt: str = None) -> Dict[str, Any]:
"""
Add multiple files to a message.
Args:
message: The message to add files to
file_ids: List of file IDs to add
extraction_prompt: Optional prompt for contextual extraction
Returns:
Updated message with files added
"""
updated_message = message.copy()
for file_id in file_ids:
updated_message = await self.add_file_to_message(updated_message, file_id, extraction_prompt)
return updated_message
return message
async def extract_document_content(self, doc_id: str, message: Dict[str, Any], extraction_prompt: str) -> Dict[str, Any]:
"""

View file

@ -448,119 +448,247 @@ class FileManager:
logger.error(f"Error analyzing file {file_id}: {str(e)}")
raise
async def extract_and_analyze_pdf_images(self,
pdf_content: bytes,
prompt: str,
ai_service
) -> List[Dict[str, Any]]:
"""
Extrahiert Bilder aus einer PDF-Datei und analysiert sie.
Arbeitet mit Binärdaten statt Dateipfaden.
Extract images from a PDF file and analyze them.
Works with binary data instead of file paths.
Args:
pdf_content: Binärdaten der PDF-Datei
prompt: Prompt für die Bildanalyse
ai_service: AI-Service für die Bildanalyse
pdf_content: Binary data of the PDF file
prompt: Prompt for image analysis
ai_service: AI service for image analysis
Returns:
Liste mit Analyseergebnissen für jedes Bild
List with analysis results for each image
"""
image_responses = []
temp_files = [] # Liste der temporären Dateien zur Bereinigung
temp_files = [] # List of temporary files for cleanup
try:
# PDF mit PyMuPDF öffnen
import fitz # PyMuPDF
# BytesIO is already imported at the top level
import tempfile
# PDF im Speicher öffnen
doc = fitz.open(stream=pdf_content, filetype="pdf")
logger.info(f"PDF geöffnet mit {len(doc)} Seiten")
# Import required libraries
try:
import fitz # PyMuPDF
from io import BytesIO
import tempfile
logger.info(f"Starting PDF image extraction with PyMuPDF")
except ImportError:
logger.error("PyMuPDF (fitz) is not installed. Install it with 'pip install pymupdf'")
return []
# Open PDF in memory
try:
doc = fitz.open(stream=pdf_content, filetype="pdf")
page_count = len(doc)
logger.info(f"PDF opened with {page_count} pages")
except Exception as pdf_err:
logger.error(f"Error opening PDF: {str(pdf_err)}")
return []
# Process each page with multiple extraction methods
for page_num, page in enumerate(doc, 1):
# Alle Bilder auf der Seite finden
image_list = page.get_images(full=True)
logger.info(f"Processing page {page_num}/{page_count}")
if image_list:
logger.info(f"Seite {page_num}: {len(image_list)} Bilder gefunden")
# Method 1: Standard extraction using get_images
try:
image_list = page.get_images(full=True)
if image_list:
logger.info(f"Method 1: Found {len(image_list)} images on page {page_num}")
for img_index, img in enumerate(image_list):
try:
xref = img[0] # Get image reference
# Extract image data
base_image = doc.extract_image(xref)
image_bytes = base_image["image"]
image_ext = base_image["ext"]
# Check for valid image data
if not image_bytes or len(image_bytes) < 100:
logger.warning(f"Empty or very small image data for image {img_index+1} on page {page_num}")
continue
# Analyze image
analysis_result = await ai_service.analyze_image(
image_data=image_bytes,
prompt=prompt,
mime_type=f"image/{image_ext}"
)
# Store image size
image_size = f"{base_image.get('width', 0)}x{base_image.get('height', 0)}"
# Add result
image_responses.append({
"page": page_num,
"image_index": img_index,
"format": image_ext,
"image_size": image_size,
"method": "get_images",
"response": analysis_result
})
logger.info(f"Successfully analyzed image {img_index+1} on page {page_num} using method 1")
except Exception as e:
logger.warning(f"Error processing image {img_index} on page {page_num} (Method 1): {str(e)}")
else:
logger.info(f"Method 1: No images found on page {page_num} using get_images")
except Exception as m1_err:
logger.warning(f"Error in Method 1 for page {page_num}: {str(m1_err)}")
for img_index, img in enumerate(image_list):
# Method 2: Extract embedded images using page.get_drawings()
try:
drawings = page.get_drawings()
drawing_images = 0
for drawing_index, drawing in enumerate(drawings):
try:
# Check if drawing contains an image
if "image" in str(drawing).lower():
drawing_images += 1
rect = drawing["rect"] # Get rectangle of the drawing
# Extract the area as an image
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), clip=rect)
img_bytes = pix.tobytes("png")
# Analyze the image
analysis_result = await ai_service.analyze_image(
image_data=img_bytes,
prompt=f"{prompt} (Page {page_num}, Drawing {drawing_index+1})",
mime_type="image/png"
)
# Add result
image_responses.append({
"page": page_num,
"image_index": drawing_index,
"format": "png",
"image_size": f"{pix.width}x{pix.height}",
"method": "get_drawings",
"response": analysis_result
})
logger.info(f"Successfully analyzed drawing image {drawing_index+1} on page {page_num} using method 2")
except Exception as drawing_err:
logger.warning(f"Error processing drawing {drawing_index} on page {page_num}: {str(drawing_err)}")
if drawing_images > 0:
logger.info(f"Method 2: Processed {drawing_images} images from drawings on page {page_num}")
else:
logger.info(f"Method 2: No images found in drawings on page {page_num}")
except Exception as m2_err:
logger.warning(f"Error in Method 2 for page {page_num}: {str(m2_err)}")
# Method 3: Extract using blocks detection
try:
blocks = page.get_text("dict")["blocks"]
img_blocks = [b for b in blocks if b.get("type") == 1] # type 1 = image
if img_blocks:
logger.info(f"Method 3: Found {len(img_blocks)} image blocks on page {page_num}")
for block_index, block in enumerate(img_blocks):
try:
# Extract using pixmap for the block region
rect = block["bbox"]
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2), clip=rect)
img_bytes = pix.tobytes("png")
# Analyze image
analysis_result = await ai_service.analyze_image(
image_data=img_bytes,
prompt=f"{prompt} (Page {page_num}, Block {block_index+1})",
mime_type="image/png"
)
# Add result
image_responses.append({
"page": page_num,
"image_index": block_index,
"format": "png",
"image_size": f"{pix.width}x{pix.height}",
"method": "block_extraction",
"response": analysis_result
})
logger.info(f"Successfully analyzed image block {block_index+1} on page {page_num} using method 3")
except Exception as block_err:
logger.warning(f"Error processing block {block_index} on page {page_num}: {str(block_err)}")
else:
logger.info(f"Method 3: No image blocks found on page {page_num}")
except Exception as m3_err:
logger.warning(f"Error in Method 3 for page {page_num}: {str(m3_err)}")
# Method 4: Last resort - render the entire page as an image and analyze
if not image_responses or not any(resp.get("page") == page_num for resp in image_responses):
try:
# Bild-Referenz
xref = img[0]
# Bild und Metadaten extrahieren
base_image = doc.extract_image(xref)
image_bytes = base_image["image"] # Tatsächliche Bilddaten
image_ext = base_image["ext"] # Dateiendung (jpg, png, etc.)
logger.info(f"Method 4: Rendering entire page {page_num} as image")
# Erstelle temporäre Datei
fd, temp_img_path = tempfile.mkstemp(suffix=f".{image_ext}")
temp_files.append(temp_img_path) # Zur Bereinigungsliste hinzufügen
# Render the entire page as an image
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
img_bytes = pix.tobytes("png")
with os.fdopen(fd, 'wb') as img_file:
img_file.write(image_bytes)
# Analyze the page as an image
analysis_result = await ai_service.analyze_image(
image_data=img_bytes,
prompt=f"{prompt} (Full page {page_num})",
mime_type="image/png"
)
logger.debug(f"Bild temporär gespeichert: {temp_img_path}")
# Analysiere mit AI-Service
try:
analysis_result = await ai_service.analyze_image(
image_data=image_bytes, # Direktes Übergeben der Bilddaten
prompt=prompt,
mime_type=f"image/{image_ext}"
)
logger.debug(f"Bildanalyse für Bild {img_index} auf Seite {page_num} abgeschlossen")
except Exception as analyze_error:
logger.error(f"Fehler bei der Bildanalyse: {str(analyze_error)}")
analysis_result = f"[Fehler bei der Bildanalyse: {str(analyze_error)}]"
# Ergebnis speichern
try:
# Versuche zuerst, die Größe aus base_image zu bekommen
if 'width' in base_image and 'height' in base_image:
image_size = f"{base_image['width']}x{base_image['height']}"
else:
# Alternative: Öffne das temporäre Bild, um die Größe zu bestimmen
from PIL import Image
with Image.open(temp_img_path) as img:
width, height = img.size
image_size = f"{width}x{height}"
except Exception as e:
logger.warning(f"Konnte Bildgröße nicht ermitteln: {str(e)}")
image_size = "unbekannt"
# Add result
image_responses.append({
"page": page_num,
"image_index": img_index,
"format": image_ext,
"image_size": image_size,
"image_index": 0,
"format": "png",
"image_size": f"{pix.width}x{pix.height}",
"method": "full_page_render",
"response": analysis_result
})
except Exception as e:
logger.warning(f"Fehler bei der Extraktion von Bild {img_index} auf Seite {page_num}: {str(e)}")
continue
logger.info(f"Successfully analyzed full page {page_num} as image using method 4")
except Exception as m4_err:
logger.warning(f"Error in Method 4 for page {page_num}: {str(m4_err)}")
logger.info(f"Extrahiert und analysiert: {len(image_responses)} Bilder aus PDF")
# Close the document
doc.close()
# Deduplicate results (different methods might extract the same image)
deduplicated_responses = []
seen_areas = set()
for response in image_responses:
# Create a unique identifier for the image area
area_key = f"{response['page']}_{response['image_size']}"
if area_key not in seen_areas:
seen_areas.add(area_key)
deduplicated_responses.append(response)
logger.info(f"PDF image extraction complete: Found {len(image_responses)} images, deduplicated to {len(deduplicated_responses)}")
return deduplicated_responses
except ImportError:
logger.error("PyMuPDF (fitz) ist nicht installiert. Installiere es mit 'pip install pymupdf'")
raise FileExtractionError("PyMuPDF (fitz) ist nicht installiert")
except ImportError as imp_err:
logger.error(f"Required library not available for PDF image extraction: {str(imp_err)}")
return []
except Exception as e:
logger.error(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
raise FileExtractionError(f"Fehler beim Extrahieren von PDF-Bildern: {str(e)}")
logger.error(f"Error extracting images from PDF: {str(e)}")
return []
finally:
# Bereinige alle temporären Dateien
# Clean up temporary files
for temp_file in temp_files:
try:
if os.path.exists(temp_file):
os.remove(temp_file)
except Exception as e:
logger.warning(f"Konnte temporäre Datei nicht entfernen: {temp_file} - {str(e)}")
return image_responses
logger.warning(f"Could not remove temporary file: {temp_file} - {str(e)}")
async def analyze_multiple_files(
self,
@ -669,8 +797,8 @@ class FileManager:
elif content_type in ['application/pdf']:
return "document"
elif content_type in ['application/vnd.ms-excel',
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'text/csv']:
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
'text/csv']:
return "spreadsheet"
# Check file extension
@ -834,6 +962,8 @@ class FileManager:
FileManager._instance = FileManager()
return FileManager._instance
# Create a singleton instance for module-level access
file_manager = FileManager.get_instance()

View file

@ -756,4 +756,5 @@ def is_text_extractable(file_name: str, content_type: str = None) -> bool:
return False
# Default to allowing extraction attempt
return True
return True

View file

@ -9,6 +9,8 @@ import asyncio
import uuid
from datetime import datetime
from typing import List, Dict, Any, Optional, Tuple, Union
import json
import re
logger = logging.getLogger(__name__)
logging.getLogger('matplotlib.font_manager').setLevel(logging.INFO)
@ -149,6 +151,8 @@ class WorkflowExecution:
return user_message
async def _create_agent_aware_work_plan(self, workflow: Dict[str, Any], message: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Create an agent-aware work plan that integrates agent selection during planning.
@ -160,25 +164,257 @@ class WorkflowExecution:
Returns:
List of structured activities with agent assignments
"""
import json
import re
import os
# Extract context information
task = message.get("content", "")
# Direct check for PowerOn keyword as an additional safeguard
if "poweron" in task.lower():
self.logging_utils.info("PowerOn keyword directly detected, creating specialized plan with creative agent", "planning")
return [{
"title": "PowerOn Response",
"description": "Generate specialized PowerOn response",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": "Text",
"dependencies": []
}]
# Get all available agents and their capabilities
agent_infos = self.agent_registry.get_agent_infos()
# Extract documents
documents = message.get("documents", [])
document_info = []
# Analyze documents without language-specific criteria
has_documents = len(documents) > 0
pdf_documents = []
table_documents = []
already_extracted_docs = []
for doc in documents:
source = doc.get("source", {})
doc_name = source.get("name", "unnamed")
doc_type = source.get("type", "unknown")
content_type = source.get("content_type", "unknown")
# Add to general document info
document_info.append({
"id": doc.get("id"),
"name": source.get("name", "unnamed"),
"type": source.get("type", "unknown"),
"content_type": source.get("content_type", "unknown")
"name": doc_name,
"type": doc_type,
"content_type": content_type
})
# Identify document types
if "pdf" in content_type.lower():
pdf_documents.append(doc_name)
# Look for signs of tables based on content structure, not language
if doc.get("contents"):
contents = doc.get("contents")
for content_item in contents:
if isinstance(content_item, dict) and content_item.get("type") == "table":
table_documents.append(doc_name)
break
# Check for already extracted content
if doc.get("contents") or (source and source.get("extracted_content")):
already_extracted_docs.append(doc_name)
# Create the planning prompt with agent awareness and document handling information
# Create a more detailed document list for analysis
detailed_document_info = []
for doc in documents:
source = doc.get("source", {})
doc_name = source.get("name", "unnamed")
doc_type = source.get("type", "unknown")
content_type = source.get("content_type", "unknown")
doc_id = doc.get("id", "unknown_id")
# Extract document properties that might help in matching
doc_properties = {
"id": doc_id,
"name": doc_name,
"type": doc_type,
"content_type": content_type
}
# Add file extension if present, handling scope properly
if "." in doc_name:
doc_properties["file_extension"] = os.path.splitext(doc_name)[1].lower()
detailed_document_info.append(doc_properties)
# Convert to JSON string safely before using in f-string
detailed_docs_json = "No documents provided"
if detailed_document_info:
try:
detailed_docs_json = json.dumps(detailed_document_info, indent=2)
except Exception as e:
self.logging_utils.warning(f"Error converting document info to JSON: {str(e)}", "planning")
# Update the task analysis prompt to better identify document processing tasks
task_analysis_prompt = f"""
Analyze the following user task and classify it.
This analysis will be used internally by the system to optimize the workflow.
TASK: {task}
AVAILABLE DOCUMENTS:
{json.dumps(detailed_document_info, indent=2) if detailed_document_info else "No documents provided"}
Please determine:
1. The primary type of operation requested (extraction, transformation, formatting, analysis, creation)
2. Whether the task appears to be primarily about:
- Extracting information from documents
- Transforming existing information
- Analyzing available information
- Creating new content
3. The documents relevant to this task (any documents that might be needed)
4. The expected output format or presentation style
5. Whether the task involves any kind of document processing (such as extracting information,
transforming data, creating tables, summarizing text, or analyzing document contents)
6. Whether the task requires online information retrieval
7. Whether the task requires complex computational algorithms or repetitive calculations
8. Whether the task contains the keyword "poweron" in any form
Return your analysis as a JSON object with these properties:
- primaryOperationType: string (extraction, transformation, formatting, analysis, creation)
- isUsingExistingData: boolean (true if primarily using already available data)
- mentionedDocuments: array of document IDs or names that are relevant to this task
- expectedOutputFormat: string (html, text, table, etc. or "unspecified")
- involvesDocumentProcessing: boolean (true if task involves any document extraction, transformation, summarization, etc.)
- requiresWebResearch: boolean (true if task requires online information)
- requiresComplexComputation: boolean (true if task requires complex algorithms or repetitive calculations)
- containsPowerOnKeyword: boolean (true if the keyword "poweron" is found in any form)
"""
# Call AI to analyze the task
self.logging_utils.info("Analyzing task to determine optimal planning approach", "planning")
# Initialize task analysis variables with defaults
operation_type = ""
is_using_existing_data = False
mentioned_documents = []
expected_output = "unspecified"
contains_poweron = False
requires_web_research = False
requires_complex_computation = False
involves_document_processing = False
can_use_optimized_plan = False
task_analysis = {}
try:
task_analysis_response = await self.ai_service.call_api([{"role": "user", "content": task_analysis_prompt}])
# Extract JSON from response
json_match = re.search(r'\{.*\}', task_analysis_response, re.DOTALL)
if json_match:
json_str = json_match.group(0)
task_analysis = json.loads(json_str)
# Log the analysis
try:
analysis_str = json.dumps(task_analysis)
self.logging_utils.info(f"Task analysis: {analysis_str}", "planning")
except Exception as e:
self.logging_utils.warning(f"Error logging task analysis: {str(e)}", "planning")
# Extract all analysis criteria from the response
operation_type = task_analysis.get("primaryOperationType", "").lower()
is_using_existing_data = task_analysis.get("isUsingExistingData", False)
mentioned_documents = task_analysis.get("mentionedDocuments", [])
expected_output = task_analysis.get("expectedOutputFormat", "").lower()
contains_poweron = task_analysis.get("containsPowerOnKeyword", False)
requires_web_research = task_analysis.get("requiresWebResearch", False)
requires_complex_computation = task_analysis.get("requiresComplexComputation", False)
involves_document_processing = task_analysis.get("involvesDocumentProcessing", False)
# PowerOn handling takes highest priority - check it first
if contains_poweron:
self.logging_utils.info("PowerOn keyword detected, creating specialized plan with creative agent", "planning")
return [{
"title": "PowerOn Response",
"description": "Generate specialized PowerOn response",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": "Text",
"dependencies": []
}]
# For web research tasks, create a simple plan with webcrawler agent
if requires_web_research:
self.logging_utils.info("Web research task detected, creating specialized plan with webcrawler agent", "planning")
return [{
"title": "Web Research",
"description": "Perform web research to answer the query",
"assigned_agents": ["webcrawler"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": "Text",
"dependencies": []
}]
# If documents are available and task involves document processing, prioritize creative agent
if has_documents and involves_document_processing:
self.logging_utils.info("Document processing task detected with available documents, using creative agent", "planning")
return [{
"title": "Document Processing",
"description": "Process documents according to requirements",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "All available documents",
"expected_output": expected_output if expected_output != "unspecified" else "Text",
"dependencies": []
}]
# If task is a document processing task even without documents, still use creative agent
if involves_document_processing and not requires_complex_computation:
self.logging_utils.info("Document processing task detected, using creative agent", "planning")
return [{
"title": "Document Processing",
"description": "Process content according to requirements",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": expected_output if expected_output != "unspecified" else "Text",
"dependencies": []
}]
# Only use coder for complex computation tasks
if requires_complex_computation:
self.logging_utils.info("Complex computation task detected, using coder agent", "planning")
return [{
"title": "Complex Computation",
"description": "Perform complex calculations or processing",
"assigned_agents": ["coder"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": expected_output if expected_output != "unspecified" else "Text",
"dependencies": []
}]
# Flag for optimized planning
can_use_optimized_plan = (
(operation_type in ["formatting", "transformation"]) and
is_using_existing_data and
has_documents
)
except Exception as e:
self.logging_utils.warning(f"Error analyzing task: {str(e)}, proceeding with standard planning", "planning")
# Create the base planning prompt
plan_prompt = f"""
As an AI workflow manager, create a detailed agent-aware work plan for the following task:
@ -188,42 +424,142 @@ AVAILABLE AGENTS:
{self._format_agent_info(agent_infos)}
AVAILABLE DOCUMENTS:
{document_info if document_info else "No documents provided"}
{json.dumps(document_info, indent=2) if document_info else "No documents provided"}
IMPORTANT: Document extraction happens automatically in the workflow. Documents in the message are already available to all agents. DO NOT assign agent_coder or any other agent specifically for just reading or extracting document content. Only assign agents for tasks that require specific processing beyond what the document handler already provides.
"""
# Add context about documents if they exist
if already_extracted_docs:
plan_prompt += f"""
IMPORTANT CONTEXT:
The following documents already have extracted content ready to use: {', '.join(already_extracted_docs)}
This means NO extraction step is needed for these documents - the data is ALREADY AVAILABLE.
"""
# Add context specific to this task based on AI analysis, not language-specific keywords
if task_analysis:
if operation_type and is_using_existing_data and has_documents:
plan_prompt += f"""
CRITICAL INSTRUCTION FOR THIS TASK:
Based on analysis, this task involves {operation_type} of data that is ALREADY AVAILABLE.
The system has identified this as primarily working with existing data, not requiring new extraction.
If this task involves structured data, that data has already been parsed and is immediately available.
DO NOT create separate extraction tasks - go directly to creating the requested output.
"""
plan_prompt += """
DOCUMENT HANDLING REQUIREMENTS:
1. When a task involves document analysis, focus on WHAT information is needed, not HOW to extract it
2. The document handler automatically extracts and processes all document components including:
- Text content from documents
- Images embedded within documents
- Charts and graphics
- Structured data and tables
3. Each document's content is pre-processed and made available to any agent that needs it
4. For document extraction specifications, simply state what information is needed from which document
5. The system will handle conversion between formats, extraction, and specialized processing
AGENT SELECTION GUIDELINES:
1. The creative agent should handle:
- All document processing tasks (extraction, summarization, analysis)
- All content creation and knowledge-based tasks
- All tasks involving documents and text transformation
- All document descriptions and data extraction
- All table creation and data representation
- All tasks with PowerOn keyword
2. The webcrawler agent should ONLY handle:
- Tasks explicitly requiring online information retrieval
- Tasks needing current information from the web
3. The coder agent should ONLY handle:
- Tasks requiring complex computational algorithms
- Tasks involving repetitive mathematical calculations
- Tasks requiring specialized programming logic
IMPORTANT DOCUMENT HANDLING PRIORITIES:
- For tasks involving document processing, ALWAYS use the creative agent even for structured data
- For tasks to extract information from documents, ALWAYS use the creative agent
- For tasks to describe or summarize document content, ALWAYS use the creative agent
- For tasks to transform data from documents, ALWAYS use the creative agent
- Only use the coder agent when complex computational logic is the primary requirement
"""
# Add task optimization advice - language agnostic, based on AI analysis
if can_use_optimized_plan:
plan_prompt += """
TASK-SPECIFIC OPTIMIZATION:
This task appears to be primarily about formatting or transforming ALREADY EXTRACTED data.
The most efficient approach is:
1. DO NOT include any extraction activities - the document data is already parsed and available
2. Use a SINGLE activity with an appropriate agent to create the requested output format
3. Focus on specifying the desired output format in detail, not on how to extract the data
"""
plan_prompt += """
The work plan should include a structured list of activities. Each activity should have:
1. title - A short descriptive title for the activity
2. description - What needs to be done in this activity
3. assigned_agents - List of agent IDs that should handle this activity (can be multiple in sequence)
4. agent_prompts - Specific instructions for each agent (matched by index to assigned_agents)
5. document_requirements - Description of which documents are needed for this activity (these will be automatically extracted)
5. document_requirements - Description of WHAT information is needed from which documents (not HOW to extract it)
6. expected_output - The expected output format and content
7. dependencies - List of previous activities this depends on (by index)
IMPORTANT GUIDELINES:
- Each activity should have clear objectives and be assigned to the most appropriate agent(s)
- When multiple agents are assigned to an activity, specify the sequence and how outputs should flow between them
- Documents are processed on-demand by the system's document handler, so only specify which documents are needed, not how to extract them
- DO NOT create activities that only read or extract document content - this happens automatically
- Create a logical sequence where later activities can use outputs from earlier ones
- If no specialized agent is needed for a task, use the default "assistant" agent
- Only use the agent_coder for tasks that require actual coding or complex data analysis, not for simply reading documents
- Optimize agent assignments based on their specialized capabilities
- Create a logical sequence of activities that builds toward the final output
- DO NOT create activities solely for document extraction - specify needed information in document_requirements
- DO NOT assign extraction tasks to specific agents - the system handles this automatically
- When a document contains both text and images, both will be processed automatically
- If a task requires analyzing images, specify what to look for in the images
- Create detailed agent_prompts that clearly explain what each agent should accomplish
- ELIMINATE redundant steps - if data is already extracted, go directly to generating the desired output format
Return the work plan as a JSON array of activity objects, each with the above properties.
"""
self.logging_utils.info("Creating agent-aware work plan", "planning")
# Call AI to generate work plan
# For tasks that can use optimized plans, generate one directly
if can_use_optimized_plan:
# For formatting/transformation tasks with extracted data, use an optimized 1-step plan
self.logging_utils.info("Using optimized single-step plan based on task analysis", "planning")
# Use the specific output format from the task analysis
expected_format = task_analysis.get("expectedOutputFormat", "HTML").upper()
if expected_format.lower() == "unspecified":
expected_format = "Text"
# Create appropriate agent assignment based on expected output and task classification
# Prefer creative agent for document processing tasks
agent_id = "creative" if involves_document_processing else "coder"
# Create a direct single-activity plan
optimized_plan = [{
"title": f"Process and Format Data",
"description": f"Process the existing data and format it as {expected_format}",
"assigned_agents": [agent_id],
"agent_prompts": [
f"The data from the documents has already been extracted and is available. "
f"Create a well-formatted {expected_format} representation of this data. "
f"No extraction is needed - focus only on proper formatting and presentation."
],
"document_requirements": f"Use the already extracted data from the available documents",
"expected_output": expected_format,
"dependencies": []
}]
# Log the optimized plan
self.logging_utils.info(f"Created optimized single-step plan with agent: {agent_id}", "planning")
return optimized_plan
# For more complex tasks, use the AI to generate a plan
try:
plan_response = await self.ai_service.call_api([{"role": "user", "content": plan_prompt}])
# Extract JSON plan
import json
import re
# Look for JSON array in the response
json_pattern = r'\[\s*\{.*\}\s*\]'
json_match = re.search(json_pattern, plan_response, re.DOTALL)
@ -231,34 +567,207 @@ Return the work plan as a JSON array of activity objects, each with the above pr
json_str = json_match.group(0)
work_plan = json.loads(json_str)
self.logging_utils.info(f"Work plan created with {len(work_plan)} activities", "planning")
# Post-process to ensure document tasks go to creative agent
for activity in work_plan:
doc_requirements = activity.get("document_requirements", "")
activity_description = activity.get("description", "").lower()
# If activity involves documents or document processing terms but isn't assigned to creative
if (doc_requirements or
"document" in activity_description or
"extract" in activity_description or
"summarize" in activity_description):
# Check if creative is not already assigned
if "creative" not in activity.get("assigned_agents", []):
activity["assigned_agents"] = ["creative"]
self.logging_utils.info("Changed agent assignment for document activity to creative agent", "planning")
# Post-process based on the task analysis to optimize if needed
if task_analysis and task_analysis.get("isUsingExistingData", False):
work_plan = self._optimize_work_plan(work_plan, task_analysis)
self.logging_utils.info(f"Post-processed work plan now has {len(work_plan)} activities", "planning")
# Log detailed work plan to console
for i, activity in enumerate(work_plan):
activity_title = activity.get("title", f"Activity {i+1}")
activity_agents = ", ".join(activity.get("assigned_agents", ["unknown"]))
self.logging_utils.info(f"Activity {i+1}: {activity_title} (Agents: {activity_agents})", "planning")
# Log document requirements if any
if activity.get("document_requirements"):
self.logging_utils.info(f" Document requirements: {activity.get('document_requirements')}", "planning")
# Log dependencies if any
if activity.get("dependencies"):
deps = [str(d + 1) for d in activity.get("dependencies")]
self.logging_utils.info(f" Dependencies: Activities {', '.join(deps)}", "planning")
return work_plan
else:
self.logging_utils.warning("Could not extract JSON from AI response", "planning")
# Fallback: Create a simple default work plan
return [{
"title": "Process Task",
"description": "Process the user's request directly",
"assigned_agents": ["assistant"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
# Fallback based on previous analysis
if requires_web_research:
return [{
"title": "Web Research",
"description": "Perform web research to answer the query",
"assigned_agents": ["webcrawler"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": "Text",
"dependencies": []
}]
elif involves_document_processing:
return [{
"title": "Document Processing",
"description": "Process documents or content according to requirements",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
elif requires_complex_computation:
return [{
"title": "Complex Computation",
"description": "Perform complex calculations or processing",
"assigned_agents": ["coder"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
else:
# Fallback: Create a simple default work plan with creative agent
return [{
"title": "Process Task",
"description": "Process the request directly",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
except Exception as e:
self.logging_utils.error(f"Error creating work plan: {str(e)}", "planning")
# Return a minimal fallback plan
# Check for PowerOn directly in fallback
if "poweron" in task.lower():
return [{
"title": "PowerOn Response (Fallback)",
"description": "Generate specialized PowerOn response after planning error",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "",
"expected_output": "Text",
"dependencies": []
}]
# Return a minimal fallback plan with creative agent
return [{
"title": "Process Task (Error Recovery)",
"description": "Process the user's request after planning error",
"assigned_agents": ["assistant"],
"description": "Process the request after planning error",
"assigned_agents": ["creative"],
"agent_prompts": [task],
"document_requirements": "All available documents may be needed",
"expected_output": "Text",
"dependencies": []
}]
# Language-agnostic optimization function using task analysis instead of keywords
def _optimize_work_plan(self, work_plan: List[Dict[str, Any]], task_analysis: Dict[str, Any]) -> List[Dict[str, Any]]:
"""
Optimize a work plan based on task analysis, not language-specific keywords.
Args:
work_plan: The original work plan
task_analysis: Analysis of the task
Returns:
Optimized work plan
"""
# Check if plan has multiple activities
if len(work_plan) <= 1:
return work_plan
# Only optimize when the task is about using existing data
if not task_analysis.get("isUsingExistingData", False):
return work_plan
# For tasks that use existing data, try to identify and remove redundant extraction steps
operation_type = task_analysis.get("primaryOperationType", "").lower()
if operation_type in ["formatting", "transformation"]:
# Use AI to identify extraction vs formatting activities instead of keywords
activities_analyzed = []
for activity in work_plan:
title = activity.get("title", "")
description = activity.get("description", "")
# Create an activity object with classification
activity_info = {
"original_activity": activity,
"is_extraction": False,
"is_formatting": False
}
# Use simple heuristics to classify (can be replaced with AI classification)
# These are pattern-based, not language-dependent
if any(x in title.lower() or x in description.lower() for x in ["extract", "parse", "read"]):
activity_info["is_extraction"] = True
if any(x in title.lower() or x in description.lower() for x in ["format", "convert", "transform"]):
activity_info["is_formatting"] = True
activities_analyzed.append(activity_info)
# Check if we have both extraction and formatting activities
has_extraction = any(a["is_extraction"] for a in activities_analyzed)
has_formatting = any(a["is_formatting"] for a in activities_analyzed)
if has_extraction and has_formatting:
# Create a new optimized plan
self.logging_utils.info("Optimizing plan by removing redundant extraction steps", "planning")
# First, separate formatting and non-extraction activities
formatting_activities = [a["original_activity"] for a in activities_analyzed if a["is_formatting"]]
other_activities = [a["original_activity"] for a in activities_analyzed
if not a["is_extraction"] and not a["is_formatting"]]
# Combine into a new optimized plan
optimized_plan = []
# Add formatting activities first
for activity in formatting_activities:
# Enhance the prompt to indicate that data is already available
prompt = activity.get("agent_prompts", [""])[0]
activity["agent_prompts"] = [
f"IMPORTANT: The data from the documents has already been extracted and is available. "
f"You do not need to perform any extraction steps.\n\n{prompt}"
]
# Reset dependencies since we're removing extraction activities
activity["dependencies"] = []
optimized_plan.append(activity)
# Add other non-extraction activities
for activity in other_activities:
# Reset dependencies
activity["dependencies"] = []
optimized_plan.append(activity)
return optimized_plan
# If no optimization possible, return original plan
return work_plan
async def _execute_work_plan(self, workflow: Dict[str, Any], work_plan: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""
Execute all activities in the work plan with proper agent handovers.
@ -365,7 +874,8 @@ Return the work plan as a JSON array of activity objects, each with the above pr
context = {
"workflow_id": self.workflow_id,
"expected_format": expected_output,
"dependency_outputs": dependency_context
"dependency_outputs": dependency_context,
"include_chat_history": True # Flag to indicate chat history should be included
}
last_result = None
@ -386,7 +896,6 @@ Return the work plan as a JSON array of activity objects, each with the above pr
# Set document handler if agent supports it
if hasattr(agent, 'set_document_handler') and hasattr(self, 'document_handler'):
agent.set_document_handler(self.document_handler)
if not agent:
self.logging_utils.warning(f"Agent '{agent_id}' not found, using assistant instead", "agents")
@ -404,9 +913,21 @@ Return the work plan as a JSON array of activity objects, each with the above pr
document_content,
dependency_context,
last_result.get("content", "") if last_result else "",
i > 0 # is_continuation flag
i > 0, # is_continuation flag
workflow # Pass the workflow parameter
)
if document_content and "Image Analysis" not in document_content:
# Instead of trying to access message or documents directly,
# We can use what we know about the workflow we're currently processing
workflow_id = self.workflow_id
# Log a warning that might help identify the issue
self.logging_utils.warning(
f"Document content available but no image analysis found - PDF image extraction may have failed for workflow {workflow_id}",
"agents"
)
# Create the message for this agent
agent_message = self._create_message(workflow, "user")
agent_message["content"] = enhanced_prompt
@ -459,11 +980,11 @@ Return the work plan as a JSON array of activity objects, each with the above pr
"content": "No agent response was generated.",
"format": "Text"
}
async def _extract_required_documents(self, workflow: Dict[str, Any], doc_requirements: str) -> Dict[str, Any]:
"""
Extract required documents based on requirements description.
Extract required documents based on requirements description with enhanced image extraction.
Args:
workflow: The workflow object
@ -481,18 +1002,68 @@ Return the work plan as a JSON array of activity objects, each with the above pr
# Get all messages from the workflow
workflow_messages = workflow.get("messages", [])
# Extract data using the dataextraction module
# Log document requirements
self.logging_utils.info(f"Document requirements: {doc_requirements}", "extraction")
self.logging_utils.info(f"Found {len(files)} files in workflow", "extraction")
# Create enhanced extraction prompt
enhanced_prompt = f"""
Extract the following information from the available documents:
REQUIRED INFORMATION: {doc_requirements}
For all documents, please:
1. Extract relevant text portions matching the requirements
2. Identify and analyze any embedded images or charts
3. Provide structured data from tables or spreadsheets
4. Summarize key information in context of the requirements
Handle multi-format documents comprehensively (text, images, charts, tables)
For images, include detailed descriptions of visual content
"""
# Extract data using the dataextraction module with enhanced prompt
self.logging_utils.info("Starting document extraction process", "extraction")
extracted_data = await data_extraction(
prompt=doc_requirements,
prompt=enhanced_prompt,
files=files,
messages=workflow_messages,
ai_service=self.ai_service,
lucydom_interface=self.lucydom_interface,
workflow_id=self.workflow_id,
add_log_func=self._add_log
add_log_func=self._add_log,
document_handler=self.document_handler # Pass document handler for better extraction
)
# Log extraction results
if extracted_data:
extracted_content = extracted_data.get("extracted_content", [])
self.logging_utils.info(f"Extracted content from {len(extracted_content)} documents", "extraction")
# Log details for each extracted document with more detail
for doc in extracted_content:
doc_name = doc.get("name", "Unnamed document")
extraction_method = doc.get("extraction_method", "unknown")
is_extracted = doc.get("is_extracted", False)
content_preview = doc.get("content", "")[:100] + "..." if len(doc.get("content", "")) > 100 else doc.get("content", "")
self.logging_utils.info(
f"Document: {doc_name}, Method: {extraction_method}, Extracted: {is_extracted}",
"extraction"
)
self.logging_utils.info(
f"Content preview: {content_preview}",
"extraction"
)
# Specifically check for image content
if "Image Analysis:" in doc.get("content", ""):
self.logging_utils.info(f"Image content found in {doc_name}", "extraction")
else:
self.logging_utils.warning(f"No image content found in {doc_name} - check PDF extraction", "extraction")
return extracted_data
async def _create_summary(self, workflow: Dict[str, Any], results: List[Dict[str, Any]]) -> Dict[str, Any]:
"""
@ -614,7 +1185,8 @@ Return the work plan as a JSON array of activity objects, each with the above pr
document_content: str,
dependency_context: Dict[str, Any],
previous_result: str,
is_continuation: bool
is_continuation: bool,
workflow: Dict[str, Any] = None # Add workflow parameter
) -> str:
"""
Enhance a prompt with context information.
@ -639,7 +1211,6 @@ Return the work plan as a JSON array of activity objects, each with the above pr
=== PREVIOUS AGENT OUTPUT ===
{previous_result}
"""
# Add document content if available
if document_content:
enhanced_prompt += f"\n\n{document_content}"
@ -657,4 +1228,24 @@ Return the work plan as a JSON array of activity objects, each with the above pr
enhanced_prompt += dependency_section
return enhanced_prompt
# Add chat history from workflow if available
if workflow and "messages" in workflow:
chat_history = "\n\n=== CONVERSATION HISTORY ===\n\n"
relevant_messages = []
# Collect relevant messages (user and assistant interactions)
for msg in workflow.get("messages", []):
if msg.get("role") in ["user", "assistant"] and msg.get("content"):
relevant_messages.append(msg)
# Add up to the last 5 messages for context
if relevant_messages:
for msg in relevant_messages[-5:]:
role = msg.get("role", "").upper()
content = msg.get("content", "")
if content:
chat_history += f"{role}: {content}\n\n"
enhanced_prompt += chat_history
return enhanced_prompt

View file

@ -136,28 +136,37 @@ class WorkflowManager:
logger.error(f"Error listing workflows: {str(e)}")
return []
async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None) -> Dict[str, Any]:
async def execute_workflow(self, message: Dict[str, Any], files: List[Dict[str, Any]] = None, workflow_id: str = None, is_user_input: bool = False) -> Dict[str, Any]:
"""
Execute a workflow with the given message and files.
Args:
message: Input message (prompt)
files: Optional list of file metadata
workflow_id: Optional ID for continuing an existing workflow
is_user_input: Flag indicating if this is user input to an existing workflow
Returns:
Workflow execution result
"""
# Generate workflow ID
workflow_id = f"wf_{uuid.uuid4()}"
# Initialize the workflow
workflow = self._initialize_workflow(workflow_id)
# Use provided workflow_id or generate a new one for a new workflow
if not workflow_id:
workflow_id = f"wf_{uuid.uuid4()}"
# Initialize a new workflow
workflow = self._initialize_workflow(workflow_id)
else:
# Load existing workflow for continuation
workflow = await self.load_workflow(workflow_id)
if not workflow:
# Fallback: initialize a new workflow with the provided ID
workflow = self._initialize_workflow(workflow_id)
# Capture start time
start_time = datetime.now()
try:
# NEW: Create WorkflowExecution with document handler
# Create WorkflowExecution with document handler
from modules.agentservice_workflow_execution import WorkflowExecution
execution = WorkflowExecution(
workflow_manager=self,
@ -172,12 +181,19 @@ class WorkflowManager:
self.document_handler.set_workflow_id(workflow_id)
# Execute the workflow
result = await execution.execute(message, workflow, files)
result = await execution.execute(message, workflow, files, is_user_input)
# Calculate duration
duration = (datetime.now() - start_time).total_seconds()
# Update workflow stats
if "data_stats" not in workflow:
workflow["data_stats"] = {
"total_processing_time": 0.0,
"total_token_count": 0,
"total_bytes_sent": 0,
"total_bytes_received": 0
}
workflow["data_stats"]["total_processing_time"] = duration
workflow["completed_at"] = datetime.now().isoformat()
@ -225,22 +241,6 @@ class WorkflowManager:
# NEW: Enhanced document handling for database persistence
# Create a copy of the workflow for database storage
db_workflow = workflow.copy()
# Process messages to ensure documents are properly formatted
if "messages" in db_workflow:
for i, message in enumerate(db_workflow["messages"]):
# ensure large document contents are truncated for database storage
if "documents" in message:
for j, doc in enumerate(message["documents"]):
if "contents" in doc:
for k, content in enumerate(doc["contents"]):
if content.get("type") == "text" and "text" in content:
# limit text size for database storage
text = content["text"]
if len(text) > 1000: # Reasonable size for preview
db_workflow["messages"][i]["documents"][j]["contents"][k]["text"] = \
text[:1000] + "... [truncated for storage]"
# Save to database
try:
self.lucydom_interface.save_workflow_state(db_workflow)

View file

@ -152,8 +152,8 @@ async def create_workflow(
workflow_task = asyncio.create_task(
workflow_manager.execute_workflow(
message={"content": workflow_request.prompt, "role": "user"},
workflow_id=workflow_id,
files=files
files=files,
workflow_id=workflow_id
)
)
@ -458,8 +458,8 @@ async def submit_user_input(
logger.info(f"Executing workflow {workflow_id} with user input")
response = await workflow_manager.execute_workflow(
message=message_object,
workflow_id=workflow_id,
files=additional_files,
workflow_id=workflow_id,
is_user_input=True
)

View file

@ -1,5 +1,34 @@
....................... TASKS
We have here an ai agents workflow.
a big problem is document extraction. i uploaded a pdf file with a picture inside. in the database i see, that the document has 1 contents, "text" with a endline, marked as "is_extracted=True". it is missing the picture inside the pdf.
I would like to have the following implementation for files in a workflow:
How do documents arrive in the workflow:
a) user input with upload or drag&drop: the file shall be stored in the database (files) and its content stored in the workflow message as documents item with reference to the file_id in the database. all contents of the file will be stored as content items in the document item of the message object. according to the content type whey will be extracted as text or as base64 string (e.g. images). the document id will be a uuid and the document-source id the integer from the object in the database "files"
b) produces documents delivered by the agents: exactly the same like a)
the content provided to an agent will now be a document consisting of the content of all previous messages including the extracted content of the documents within the messages. the extracted content of the documents is produced for each content of the document:
- for text: An ai call with the extraction prompt delivers the text to be integrated
- for an image (it is available as base64 content) an ai call with the extraction prompt delivers the text to be integrated
Like this we have not anymore the problem, that file content is not found by the agents.
For code implementation I see a big opportunity to massively reduce code. To build basic methods to be used everywhere:
1. function "document_store_upload(message_id,filename,filepath...) --> function to store an uploaded or drag&drop document from the user and return the document object. This function does the steps for a) respectively b) like described above and identified the filetype
2. function "document_store_agent(message_id,filename,document_content,document_type...) --> function to store the produced document from the agent and return the document object. This function does the steps like described in section a) above
3. function "document_get_from_message()
Based on these 3 functions all operations can be done much more comfortable in the workflow, but also in connection with the ui (download file, copy file, preview file), because all references to the files are always ensured.
Can you analyze this idea?
What did I not yet consider, that would be relevant for the current code to adapt?
how big is the effort to have this logic implemented?
----------------------- OPEN
@ -32,6 +61,15 @@ add connector to myoutlook
----------------------- DONE
Currently the webcrawler is always called for unclear prompts. Can you please add an agent for "Creative" or "knowledge" answers and select him rather than the webcrawler (meaning to adapt criteria for webcrawler, that he is only called for explicit web research or internet search).
The Creative Agent shall be selected for open questions or simple documentation topics, e.g. writing an email, write a birthday card, what to consider if going 1 year to usa, etc. He can also deliver documents. So to specify in his prompt, that it is clear what he delivers and how it it taken out for the next agent.
The exception for "poweron" keyword shall also be routed to this agent. This means, he is the one to answer the keyword "poweron". Like this you can please remove all "poweron"-specific code in the modules and integrate the answer for poweron in this "Creative" agent.
Please use the agentservice_base.py to create this agent (same template as for all other agents).
Modul "agentservice_agent_documentation.py": Bitte die Berichterstellung adaptiv zum Prompt machen. Bei einfachen Berichten eher eine Zusammenfassung, bei komplexen Berichten mit Kapiteln arbeiten.